From 30acef00eadcf70d021d884baf8d2f7ccd9769c5 Mon Sep 17 00:00:00 2001
From: Robert Bradshaw <robertwb@gmail.com>
Date: Thu, 20 Apr 2017 15:59:25 -0500
Subject: [PATCH 01/33] Add fn api runner.

---
 sdks/python/apache_beam/runners/__init__.py   |  30 --
 sdks/python/apache_beam/runners/data_plane.py | 283 +++++++++++
 .../apache_beam/runners/data_plane_test.py    | 135 ++++++
 sdks/python/apache_beam/runners/fn_harness.py |  59 +++
 .../python/apache_beam/runners/log_handler.py |  97 ++++
 .../apache_beam/runners/log_handler_test.py   | 100 ++++
 .../python/apache_beam/runners/sdk_harness.py | 446 +++++++++++++++++
 .../apache_beam/runners/sdk_harness_runner.py | 457 ++++++++++++++++++
 .../runners/sdk_harness_runner_test.py        |  29 ++
 .../apache_beam/runners/sdk_harness_test.py   | 168 +++++++
 .../apache_beam/runners/worker_runner_base.py | 448 +++++++++++++++++
 .../runners/worker_runner_base_test.py        | 185 +++++++
 12 files changed, 2407 insertions(+), 30 deletions(-)
 create mode 100644 sdks/python/apache_beam/runners/data_plane.py
 create mode 100644 sdks/python/apache_beam/runners/data_plane_test.py
 create mode 100644 sdks/python/apache_beam/runners/fn_harness.py
 create mode 100644 sdks/python/apache_beam/runners/log_handler.py
 create mode 100644 sdks/python/apache_beam/runners/log_handler_test.py
 create mode 100644 sdks/python/apache_beam/runners/sdk_harness.py
 create mode 100644 sdks/python/apache_beam/runners/sdk_harness_runner.py
 create mode 100644 sdks/python/apache_beam/runners/sdk_harness_runner_test.py
 create mode 100644 sdks/python/apache_beam/runners/sdk_harness_test.py
 create mode 100644 sdks/python/apache_beam/runners/worker_runner_base.py
 create mode 100644 sdks/python/apache_beam/runners/worker_runner_base_test.py

diff --git a/sdks/python/apache_beam/runners/__init__.py b/sdks/python/apache_beam/runners/__init__.py
index 2b93c3085948..e69de29bb2d1 100644
--- a/sdks/python/apache_beam/runners/__init__.py
+++ b/sdks/python/apache_beam/runners/__init__.py
@@ -1,30 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-"""Runner objects execute a Pipeline.
-
-This package defines runners, which are used to execute a pipeline.
-"""
-
-from apache_beam.runners.direct.direct_runner import DirectRunner
-from apache_beam.runners.direct.direct_runner import EagerRunner
-from apache_beam.runners.runner import PipelineRunner
-from apache_beam.runners.runner import PipelineState
-from apache_beam.runners.runner import create_runner
-
-from apache_beam.runners.dataflow.dataflow_runner import DataflowRunner
-from apache_beam.runners.dataflow.test_dataflow_runner import TestDataflowRunner
diff --git a/sdks/python/apache_beam/runners/data_plane.py b/sdks/python/apache_beam/runners/data_plane.py
new file mode 100644
index 000000000000..892f33c69bb1
--- /dev/null
+++ b/sdks/python/apache_beam/runners/data_plane.py
@@ -0,0 +1,283 @@
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Implementation of DataChannels for communicating across the data plane."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import abc
+import collections
+import logging
+import Queue as queue
+import threading
+
+from apache_beam.coders import coder_impl
+from dataflow_worker.fn_harness import beam_fn_api_pb2
+import grpc
+
+
+class ClosableOutputStream(type(coder_impl.create_OutputStream())):
+  """A Outputstream for use with CoderImpls that has a close() method."""
+
+  def __init__(self, close_callback=None):
+    self._close_callback = close_callback
+
+  def close(self):
+    if self._close_callback:
+      self._close_callback(self.get())
+
+
+class DataChannel(object):
+  """Represents a channel for reading and writing data over the data plane.
+
+  Read from this channel with the input_elements method::
+
+    for elements_data in data_channel.input_elements(instruction_id, targets):
+      [process elements_data]
+
+  Write to this channel using the output_stream method::
+
+    out1 = data_channel.output_stream(instruction_id, target1)
+    out1.write(...)
+    out1.close()
+
+  When all data for all instructions is written, close the channel::
+
+    data_channel.close()
+  """
+
+  __metaclass__ = abc.ABCMeta
+
+  @abc.abstractmethod
+  def input_elements(self, instruction_id, expected_targets):
+    """Returns an iterable of all Element.Data bundles for instruction_id.
+
+    This iterable terminates only once the full set of data has been recieved
+    for each of the expected targets. It may block waiting for more data.
+
+    Args:
+        instruction_id: which instruction the results must belong to
+        expected_targets: which targets to wait on for completion
+    """
+    raise NotImplementedError(type(self))
+
+  @abc.abstractmethod
+  def output_stream(self, instruction_id, target):
+    """Returns an output stream writing elements to target.
+
+    Args:
+        instruction_id: which instruction this stream belongs to
+        target: the target of the returned stream
+    """
+    raise NotImplementedError(type(self))
+
+  @abc.abstractmethod
+  def close(self):
+    """Closes this channel, indicating that all data has been written.
+
+    Data can continue to be read.
+
+    If this channel is shared by many instructions, should only be called on
+    worker shutdown.
+    """
+    raise NotImplementedError(type(self))
+
+
+class InMemoryDataChannel(DataChannel):
+  """An in-memory implementation of a DataChannel.
+
+  This channel is two-sided.  What is written to one side is read by the other.
+  The inverse() method returns the other side of a instance.
+  """
+
+  def __init__(self, inverse=None):
+    self._inputs = []
+    self._inverse = inverse or InMemoryDataChannel(self)
+
+  def inverse(self):
+    return self._inverse
+
+  def input_elements(self, instruction_id, unused_expected_targets=None):
+    for data in self._inputs:
+      if data.instruction_reference == instruction_id:
+        yield data
+
+  def output_stream(self, instruction_id, target):
+    def add_to_inverse_output(data):
+      self._inverse._inputs.append(  # pylint: disable=protected-access
+          beam_fn_api_pb2.Elements.Data(
+              instruction_reference=instruction_id,
+              target=target,
+              data=data))
+    return ClosableOutputStream(add_to_inverse_output)
+
+  def close(self):
+    pass
+
+
+class _GrpcDataChannel(DataChannel):
+  """Base class for implementing a BeamFnData-based DataChannel."""
+
+  _WRITES_FINISHED = object()
+
+  def __init__(self):
+    self._to_send = queue.Queue()
+    self._received = collections.defaultdict(queue.Queue)
+    self._receive_lock = threading.Lock()
+    self._reads_finished = threading.Event()
+
+  def close(self):
+    self._to_send.put(self._WRITES_FINISHED)
+
+  def wait(self, timeout=None):
+    self._reads_finished.wait(timeout)
+
+  def _receiving_queue(self, instruction_id):
+    with self._receive_lock:
+      return self._received[instruction_id]
+
+  def input_elements(self, instruction_id, expected_targets):
+    received = self._receiving_queue(instruction_id)
+    done_targets = []
+    while len(done_targets) < len(expected_targets):
+      data = received.get()
+      if not data.data and data.target in expected_targets:
+        done_targets.append(data.target)
+      else:
+        assert data.target not in done_targets
+        yield data
+
+  def output_stream(self, instruction_id, target):
+    def add_to_send_queue(data):
+      self._to_send.put(
+          beam_fn_api_pb2.Elements.Data(
+              instruction_reference=instruction_id,
+              target=target,
+              data=data))
+      self._to_send.put(
+          beam_fn_api_pb2.Elements.Data(
+              instruction_reference=instruction_id,
+              target=target,
+              data=''))
+    return ClosableOutputStream(add_to_send_queue)
+
+  def _write_outputs(self):
+    done = False
+    while not done:
+      data = [self._to_send.get()]
+      try:
+        # Coalesce up to 100 other items.
+        for _ in range(100):
+          data.append(self._to_send.get_nowait())
+      except queue.Empty:
+        pass
+      if data[-1] is self._WRITES_FINISHED:
+        done = True
+        data.pop()
+      if data:
+        yield beam_fn_api_pb2.Elements(data=data)
+
+  def _read_inputs(self, elements_iterator):
+    # TODO(robertwb): Pushback/throttling to avoid unbounded buffering.
+    try:
+      for elements in elements_iterator:
+        for data in elements.data:
+          self._receiving_queue(data.instruction_reference).put(data)
+    except:  # pylint: disable=broad-except
+      logging.exception('Failed to read inputs in the data plane')
+      raise
+    finally:
+      self._reads_finished.set()
+
+  def _start_reader(self, elements_iterator):
+    reader = threading.Thread(
+        target=lambda: self._read_inputs(elements_iterator),
+        name='read_grpc_client_inputs')
+    reader.daemon = True
+    reader.start()
+
+
+class GrpcClientDataChannel(_GrpcDataChannel):
+  """A DataChannel wrapping the client side of a BeamFnData connection."""
+
+  def __init__(self, data_stub):
+    super(GrpcClientDataChannel, self).__init__()
+    self._start_reader(data_stub.Data(self._write_outputs()))
+
+
+class GrpcServerDataChannel(
+    beam_fn_api_pb2.BeamFnDataServicer, _GrpcDataChannel):
+  """A DataChannel wrapping the server side of a BeamFnData connection."""
+
+  def Data(self, elements_iterator, context):
+    self._start_reader(elements_iterator)
+    for elements in self._write_outputs():
+      yield elements
+
+
+class DataChannelFactory(object):
+  """An abstract factory for creating ``DataChannel``."""
+
+  __metaclass__ = abc.ABCMeta
+
+  @abc.abstractmethod
+  def create_data_channel(self, function_spec):
+    """Returns a ``DataChannel`` from the given function_spec."""
+    raise NotImplementedError(type(self))
+
+  @abc.abstractmethod
+  def close(self):
+    """Close all channels that this factory owns."""
+    raise NotImplementedError(type(self))
+
+
+class GrpcClientDataChannelFactory(DataChannelFactory):
+  """A factory for ``GrpcClientDataChannel``.
+
+  Caches the created channels by ``data descriptor url``.
+  """
+
+  def __init__(self):
+    self._data_channel_cache = {}
+
+  def create_data_channel(self, function_spec):
+    remote_grpc_port = beam_fn_api_pb2.RemoteGrpcPort()
+    function_spec.data.Unpack(remote_grpc_port)
+    url = remote_grpc_port.api_service_descriptor.url
+    if url not in self._data_channel_cache:
+      logging.info('Creating channel for %s', url)
+      grpc_channel = grpc.insecure_channel(url)
+      self._data_channel_cache[url] = GrpcClientDataChannel(
+          beam_fn_api_pb2.BeamFnDataStub(grpc_channel))
+    return self._data_channel_cache[url]
+
+  def close(self):
+    logging.info('Closing all cached grpc data channels.')
+    for _, channel in self._data_channel_cache.items():
+      channel.close()
+    self._data_channel_cache.clear()
+
+
+class InMemoryDataChannelFactory(DataChannelFactory):
+  """A singleton factory for ``InMemoryDataChannel``."""
+
+  def __init__(self, in_memory_data_channel):
+    self._in_memory_data_channel = in_memory_data_channel
+
+  def create_data_channel(self, unused_function_spec):
+    return self._in_memory_data_channel
+
+  def close(self):
+    pass
diff --git a/sdks/python/apache_beam/runners/data_plane_test.py b/sdks/python/apache_beam/runners/data_plane_test.py
new file mode 100644
index 000000000000..abd04171d7be
--- /dev/null
+++ b/sdks/python/apache_beam/runners/data_plane_test.py
@@ -0,0 +1,135 @@
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Tests for dataflow_worker.fn_harness.data_plane."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import logging
+import sys
+import threading
+import unittest
+
+from concurrent import futures
+from dataflow_worker.fn_harness import beam_fn_api_pb2
+from dataflow_worker.fn_harness import data_plane
+import grpc
+import portpicker
+
+
+def timeout(timeout_secs):
+  def decorate(fn):
+    exc_info = []
+    def wrapper(*args, **kwargs):
+      def call_fn():
+        try:
+          fn(*args, **kwargs)
+        except:  # pylint: disable=bare-except
+          exc_info[:] = sys.exc_info()
+      thread = threading.Thread(target=call_fn)
+      thread.daemon = True
+      thread.start()
+      thread.join(timeout_secs)
+      if exc_info:
+        t, v, tb = exc_info  # pylint: disable=unbalanced-tuple-unpacking
+        raise t, v, tb
+      assert not thread.is_alive(), 'timed out after %s seconds' % timeout_secs
+    return wrapper
+  return decorate
+
+
+class DataChannelTest(unittest.TestCase):
+
+  @timeout(5)
+  def test_grpc_data_channel(self):
+    data_channel_service = data_plane.GrpcServerDataChannel()
+
+    test_port = portpicker.pick_unused_port()
+    server = grpc.server(futures.ThreadPoolExecutor(max_workers=2))
+    beam_fn_api_pb2.add_BeamFnDataServicer_to_server(
+        data_channel_service, server)
+    server.add_insecure_port('[::]:%s' % test_port)
+    server.start()
+
+    data_channel_stub = beam_fn_api_pb2.BeamFnDataStub(
+        grpc.insecure_channel('localhost:%s' % test_port))
+    data_channel_client = data_plane.GrpcClientDataChannel(data_channel_stub)
+
+    try:
+      self._data_channel_test(data_channel_service, data_channel_client)
+    finally:
+      data_channel_client.close()
+      data_channel_service.close()
+      data_channel_client.wait()
+      data_channel_service.wait()
+
+  def test_in_memory_data_channel(self):
+    channel = data_plane.InMemoryDataChannel()
+    self._data_channel_test(channel, channel.inverse())
+
+  def _data_channel_test(self, server, client):
+    self._data_channel_test_one_direction(server, client)
+    self._data_channel_test_one_direction(client, server)
+
+  def _data_channel_test_one_direction(self, from_channel, to_channel):
+    def send(instruction_id, target, data):
+      stream = from_channel.output_stream(instruction_id, target)
+      stream.write(data)
+      stream.close()
+    target_1 = beam_fn_api_pb2.Target(
+        primitive_transform_reference='1',
+        name='out')
+    target_2 = beam_fn_api_pb2.Target(
+        primitive_transform_reference='2',
+        name='out')
+
+    # Single write.
+    send('0', target_1, 'abc')
+    self.assertEqual(
+        list(to_channel.input_elements('0', [target_1])),
+        [beam_fn_api_pb2.Elements.Data(
+            instruction_reference='0',
+            target=target_1,
+            data='abc')])
+
+    # Multiple interleaved writes to multiple instructions.
+    target_2 = beam_fn_api_pb2.Target(
+        primitive_transform_reference='2',
+        name='out')
+
+    send('1', target_1, 'abc')
+    send('2', target_1, 'def')
+    self.assertEqual(
+        list(to_channel.input_elements('1', [target_1])),
+        [beam_fn_api_pb2.Elements.Data(
+            instruction_reference='1',
+            target=target_1,
+            data='abc')])
+    send('2', target_2, 'ghi')
+    self.assertEqual(
+        list(to_channel.input_elements('2', [target_1, target_2])),
+        [beam_fn_api_pb2.Elements.Data(
+            instruction_reference='2',
+            target=target_1,
+            data='def'),
+         beam_fn_api_pb2.Elements.Data(
+             instruction_reference='2',
+             target=target_2,
+             data='ghi')])
+
+
+if __name__ == '__main__':
+  logging.getLogger().setLevel(logging.INFO)
+  unittest.main()
diff --git a/sdks/python/apache_beam/runners/fn_harness.py b/sdks/python/apache_beam/runners/fn_harness.py
new file mode 100644
index 000000000000..3b4159782ca4
--- /dev/null
+++ b/sdks/python/apache_beam/runners/fn_harness.py
@@ -0,0 +1,59 @@
+# Copyright 2017 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""SDK Fn Harness entry point."""
+
+import logging
+import os
+import sys
+
+from dataflow_worker.fn_harness import beam_fn_api_pb2
+from dataflow_worker.fn_harness.log_handler import FnApiLogRecordHandler
+from dataflow_worker.fn_harness.sdk_harness import SdkHarness
+import grpc
+
+from google.protobuf import text_format
+
+
+def main(unused_argv):
+  """Main entry point for SDK Fn Harness."""
+  logging_service_descriptor = beam_fn_api_pb2.ApiServiceDescriptor()
+  text_format.Merge(os.environ['LOGGING_API_SERVICE_DESCRIPTOR'],
+                    logging_service_descriptor)
+
+  # Send all logs to the runner.
+  fn_log_handler = FnApiLogRecordHandler(logging_service_descriptor)
+  # TODO(vikasrk): This should be picked up from pipeline options.
+  logging.getLogger().setLevel(logging.INFO)
+  logging.getLogger().addHandler(fn_log_handler)
+
+  try:
+    logging.info('Python sdk harness started.')
+    service_descriptor = beam_fn_api_pb2.ApiServiceDescriptor()
+    text_format.Merge(os.environ['CONTROL_API_SERVICE_DESCRIPTOR'],
+                      service_descriptor)
+    # TODO(robertwb): Support credentials.
+    assert not service_descriptor.oauth2_client_credentials_grant.url
+    channel = grpc.insecure_channel(service_descriptor.url)
+    SdkHarness(channel).run()
+    logging.info('Python sdk harness exiting.')
+  except:  # pylint: disable=broad-except
+    logging.exception('Python sdk harness failed: ')
+    raise
+  finally:
+    fn_log_handler.close()
+
+
+if __name__ == '__main__':
+  main(sys.argv)
diff --git a/sdks/python/apache_beam/runners/log_handler.py b/sdks/python/apache_beam/runners/log_handler.py
new file mode 100644
index 000000000000..23e7fa940c2e
--- /dev/null
+++ b/sdks/python/apache_beam/runners/log_handler.py
@@ -0,0 +1,97 @@
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Beam fn API log handler."""
+
+import logging
+import math
+import Queue as queue
+import threading
+
+from dataflow_worker.fn_harness import beam_fn_api_pb2
+import grpc
+
+
+class FnApiLogRecordHandler(logging.Handler):
+  """A handler that writes log records to the fn API."""
+
+  # Maximum number of log entries in a single stream request.
+  _MAX_BATCH_SIZE = 1000
+  # Used to indicate the end of stream.
+  _FINISHED = object()
+
+  # Mapping from logging levels to LogEntry levels.
+  LOG_LEVEL_MAP = {
+      logging.FATAL: beam_fn_api_pb2.LogEntry.CRITICAL,
+      logging.ERROR: beam_fn_api_pb2.LogEntry.ERROR,
+      logging.WARNING: beam_fn_api_pb2.LogEntry.ERROR,
+      logging.INFO: beam_fn_api_pb2.LogEntry.INFO,
+      logging.DEBUG: beam_fn_api_pb2.LogEntry.DEBUG
+  }
+
+  def __init__(self, log_service_descriptor):
+    super(FnApiLogRecordHandler, self).__init__()
+    self._log_channel = grpc.insecure_channel(log_service_descriptor.url)
+    self._logging_stub = beam_fn_api_pb2.BeamFnLoggingStub(self._log_channel)
+    self._log_entry_queue = queue.Queue()
+
+    log_control_messages = self._logging_stub.Logging(self._write_log_entries())
+    self._reader = threading.Thread(
+        target=lambda: self._read_log_control_messages(log_control_messages),
+        name='read_log_control_messages')
+    self._reader.daemon = True
+    self._reader.start()
+
+  def emit(self, record):
+    log_entry = beam_fn_api_pb2.LogEntry()
+    log_entry.severity = self.LOG_LEVEL_MAP[record.levelno]
+    log_entry.message = self.format(record)
+    log_entry.thread = record.threadName
+    log_entry.log_location = record.module + '.' + record.funcName
+    (fraction, seconds) = math.modf(record.created)
+    nanoseconds = 1e9 * fraction
+    log_entry.timestamp.seconds = int(seconds)
+    log_entry.timestamp.nanos = int(nanoseconds)
+    self._log_entry_queue.put(log_entry)
+
+  def close(self):
+    """Flush out all existing log entries and unregister this handler."""
+    # Acquiring the handler lock ensures ``emit`` is not run until the lock is
+    # released.
+    self.acquire()
+    self._log_entry_queue.put(self._FINISHED)
+    # wait on server to close.
+    self._reader.join()
+    self.release()
+    # Unregister this handler.
+    super(FnApiLogRecordHandler, self).close()
+
+  def _write_log_entries(self):
+    done = False
+    while not done:
+      log_entries = [self._log_entry_queue.get()]
+      try:
+        for _ in range(self._MAX_BATCH_SIZE):
+          log_entries.append(self._log_entry_queue.get_nowait())
+      except queue.Empty:
+        pass
+      if log_entries[-1] is self._FINISHED:
+        done = True
+        log_entries.pop()
+      if log_entries:
+        yield beam_fn_api_pb2.LogEntry.List(log_entries=log_entries)
+
+  def _read_log_control_messages(self, log_control_iterator):
+    # TODO(vikasrk): Handle control messages.
+    for _ in log_control_iterator:
+      pass
diff --git a/sdks/python/apache_beam/runners/log_handler_test.py b/sdks/python/apache_beam/runners/log_handler_test.py
new file mode 100644
index 000000000000..3e6fc097fe6a
--- /dev/null
+++ b/sdks/python/apache_beam/runners/log_handler_test.py
@@ -0,0 +1,100 @@
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import logging
+import unittest
+
+from concurrent import futures
+from dataflow_worker.fn_harness import beam_fn_api_pb2
+from dataflow_worker.fn_harness import log_handler
+import grpc
+import portpicker
+
+
+class BeamFnLoggingServicer(beam_fn_api_pb2.BeamFnLoggingServicer):
+
+  def __init__(self):
+    self.log_records_received = []
+
+  def Logging(self, request_iterator, context):
+
+    for log_record in request_iterator:
+      self.log_records_received.append(log_record)
+
+    yield beam_fn_api_pb2.LogControl()
+
+
+class FnApiLogRecordHandlerTest(unittest.TestCase):
+
+  def setUp(self):
+    self.test_logging_service = BeamFnLoggingServicer()
+    self.test_port = portpicker.pick_unused_port()
+    self.server = grpc.server(futures.ThreadPoolExecutor(max_workers=10))
+    beam_fn_api_pb2.add_BeamFnLoggingServicer_to_server(
+        self.test_logging_service, self.server)
+    self.server.add_insecure_port('[::]:%s' % self.test_port)
+    self.server.start()
+
+    self.logging_service_descriptor = beam_fn_api_pb2.ApiServiceDescriptor()
+    self.logging_service_descriptor.url = 'localhost:%s' % self.test_port
+    self.fn_log_handler = log_handler.FnApiLogRecordHandler(
+        self.logging_service_descriptor)
+    logging.getLogger().setLevel(logging.INFO)
+    logging.getLogger().addHandler(self.fn_log_handler)
+
+  def tearDown(self):
+    # wait upto 5 seconds.
+    self.server.stop(5)
+
+  def _verify_fn_log_handler(self, num_log_entries):
+    msg = 'Testing fn logging'
+    logging.debug('Debug Message 1')
+    for idx in range(num_log_entries):
+      logging.info('%s: %s', msg, idx)
+    logging.debug('Debug Message 2')
+
+    # Wait for logs to be sent to server.
+    self.fn_log_handler.close()
+
+    num_received_log_entries = 0
+    for outer in self.test_logging_service.log_records_received:
+      for log_entry in outer.log_entries:
+        self.assertEqual(beam_fn_api_pb2.LogEntry.INFO, log_entry.severity)
+        self.assertEqual('%s: %s' % (msg, num_received_log_entries),
+                         log_entry.message)
+        self.assertEqual(u'log_handler_test._verify_fn_log_handler',
+                         log_entry.log_location)
+        self.assertGreater(log_entry.timestamp.seconds, 0)
+        self.assertGreater(log_entry.timestamp.nanos, 0)
+        num_received_log_entries += 1
+
+    self.assertEqual(num_received_log_entries, num_log_entries)
+
+# Test cases.
+data = {
+    'one_batch': log_handler.FnApiLogRecordHandler._MAX_BATCH_SIZE - 47,
+    'exact_multiple': log_handler.FnApiLogRecordHandler._MAX_BATCH_SIZE,
+    'multi_batch': log_handler.FnApiLogRecordHandler._MAX_BATCH_SIZE * 3 + 47
+}
+
+
+def _create_test(name, num_logs):
+  setattr(FnApiLogRecordHandlerTest, 'test_%s' % name,
+          lambda self: self._verify_fn_log_handler(num_logs))
+
+
+if __name__ == '__main__':
+  for test_name, num_logs_entries in data.iteritems():
+    _create_test(test_name, num_logs_entries)
+
+  unittest.main()
diff --git a/sdks/python/apache_beam/runners/sdk_harness.py b/sdks/python/apache_beam/runners/sdk_harness.py
new file mode 100644
index 000000000000..c7fcccbc3a93
--- /dev/null
+++ b/sdks/python/apache_beam/runners/sdk_harness.py
@@ -0,0 +1,446 @@
+# Copyright 2016 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""SDK harness for executing Python Fns via the Fn API."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+import json
+import logging
+import Queue as queue
+import threading
+import traceback
+import zlib
+
+from apache_beam.coders import coder_impl
+from apache_beam.coders import WindowedValueCoder
+from apache_beam.internal import pickler
+from apache_beam.runners.dataflow.native_io import iobase
+from apache_beam.utils import counters
+from dataflow_worker import operation_specs
+from dataflow_worker import operations
+from dataflow_worker import statesampler
+from dataflow_worker.fn_harness import beam_fn_api_pb2
+from dataflow_worker.fn_harness.data_plane import GrpcClientDataChannelFactory
+import dill
+
+from google3.google.protobuf import wrappers_pb2
+
+
+DATA_INPUT_URN = 'urn:org.apache.beam:source:runner:0.1'
+DATA_OUTPUT_URN = 'urn:org.apache.beam:sink:runner:0.1'
+IDENTITY_DOFN_URN = 'urn:org.apache.beam:dofn:identity:0.1'
+PYTHON_ITERABLE_VIEWFN_URN = 'urn:org.apache.beam:viewfn:iterable:python:0.1'
+PYTHON_CODER_URN = 'urn:org.apache.beam:coder:python:0.1'
+# TODO(vikasrk): Fix this once runner sends appropriate python urns.
+PYTHON_DOFN_URN = 'urn:org.apache.beam:dofn:java:0.1'
+PYTHON_SOURCE_URN = 'urn:org.apache.beam:source:java:0.1'
+
+
+class RunnerIOOperation(operations.Operation):
+  """Common baseclass for runner harness IO operations."""
+
+  def __init__(self, operation_name, step_name, consumers, counter_factory,
+               state_sampler, windowed_coder, target, data_channel):
+    super(RunnerIOOperation, self).__init__(
+        operation_name, None, counter_factory, state_sampler)
+    self.windowed_coder = windowed_coder
+    self.step_name = step_name
+    # target represents the consumer for the bytes in the data plane for a
+    # DataInputOperation or a producer of these bytes for a DataOutputOperation.
+    self.target = target
+    self.data_channel = data_channel
+    for _, consumer_ops in consumers.items():
+      for consumer in consumer_ops:
+        self.add_receiver(consumer, 0)
+
+
+class DataOutputOperation(RunnerIOOperation):
+  """A sink-like operation that gathers outputs to be sent back to the runner.
+  """
+
+  def set_output_stream(self, output_stream):
+    self.output_stream = output_stream
+
+  def process(self, windowed_value):
+    self.windowed_coder.get_impl().encode_to_stream(
+        windowed_value, self.output_stream, True)
+
+  def finish(self):
+    self.output_stream.close()
+    super(DataOutputOperation, self).finish()
+
+
+class DataInputOperation(RunnerIOOperation):
+  """A source-like operation that gathers input from the runner.
+  """
+
+  def __init__(self, operation_name, step_name, consumers, counter_factory,
+               state_sampler, windowed_coder, input_target, data_channel):
+    super(DataInputOperation, self).__init__(
+        operation_name, step_name, consumers, counter_factory, state_sampler,
+        windowed_coder, target=input_target, data_channel=data_channel)
+    # We must do this manually as we don't have a spec or spec.output_coders.
+    self.receivers = [
+        operations.ConsumerSet(self.counter_factory, self.step_name, 0,
+                               consumers.itervalues().next(),
+                               self.windowed_coder)]
+
+  def process(self, windowed_value):
+    self.output(windowed_value)
+
+  def process_encoded(self, encoded_windowed_values):
+    input_stream = coder_impl.create_InputStream(encoded_windowed_values)
+    while input_stream.size() > 0:
+      decoded_value = self.windowed_coder.get_impl().decode_from_stream(
+          input_stream, True)
+      self.output(decoded_value)
+
+
+# TODO(robertwb): Revise side input API to not be in terms of native sources.
+# This will enable lookups, but there's an open question as to how to handle
+# custom sources without forcing intermediate materialization.  This seems very
+# related to the desire to inject key and window preserving [Splittable]DoFns
+# into the view computation.
+class SideInputSource(iobase.NativeSource, iobase.NativeSourceReader):
+  """A 'source' for reading side inputs via state API calls.
+  """
+
+  def __init__(self, state_handler, state_key, coder):
+    self._state_handler = state_handler
+    self._state_key = state_key
+    self._coder = coder
+
+  def reader(self):
+    return self
+
+  @property
+  def returns_windowed_values(self):
+    return True
+
+  def __enter__(self):
+    return self
+
+  def __exit__(self, *exn_info):
+    pass
+
+  def __iter__(self):
+    # TODO(robertwb): Support pagination.
+    input_stream = coder_impl.create_InputStream(
+        self._state_handler.Get(self._state_key).data)
+    while input_stream.size() > 0:
+      yield self._coder.get_impl().decode_from_stream(input_stream, True)
+
+
+def unpack_and_deserialize_py_fn(function_spec):
+  """Returns unpacked and deserialized object from function spec proto."""
+  return pickler.loads(unpack_function_spec_data(function_spec))
+
+
+def unpack_function_spec_data(function_spec):
+  """Returns unpacked data from function spec proto."""
+  data = wrappers_pb2.BytesValue()
+  function_spec.data.Unpack(data)
+  return data.value
+
+
+# pylint: disable=redefined-builtin
+def serialize_and_pack_py_fn(fn, urn, id=None):
+  """Returns serialized and packed function in a function spec proto."""
+  return pack_function_spec_data(pickler.dumps(fn), urn, id)
+# pylint: enable=redefined-builtin
+
+
+# pylint: disable=redefined-builtin
+def pack_function_spec_data(value, urn, id=None):
+  """Returns packed data in a function spec proto."""
+  data = wrappers_pb2.BytesValue(value=value)
+  fn_proto = beam_fn_api_pb2.FunctionSpec(urn=urn)
+  fn_proto.data.Pack(data)
+  if id:
+    fn_proto.id = id
+  return fn_proto
+# pylint: enable=redefined-builtin
+
+
+# TODO(vikasrk): move this method to ``coders.py`` in the SDK.
+def load_compressed(compressed_data):
+  """Returns a decompressed and deserialized python object."""
+  # Note: SDK uses ``pickler.dumps`` to serialize certain python objects
+  # (like sources), which involves serialization, compression and base64
+  # encoding. We cannot directly use ``pickler.loads`` for
+  # deserialization, as the runner would have already base64 decoded the
+  # data. So we only need to decompress and deserialize.
+
+  data = zlib.decompress(compressed_data)
+  try:
+    return dill.loads(data)
+  except Exception:          # pylint: disable=broad-except
+    dill.dill._trace(True)   # pylint: disable=protected-access
+    return dill.loads(data)
+  finally:
+    dill.dill._trace(False)  # pylint: disable=protected-access
+
+
+class SdkHarness(object):
+
+  def __init__(self, control_channel):
+    self._control_channel = control_channel
+    self._data_channel_factory = GrpcClientDataChannelFactory()
+
+  def run(self):
+    contol_stub = beam_fn_api_pb2.BeamFnControlStub(self._control_channel)
+    state_stub = beam_fn_api_pb2.SimpleBeamFnStateStub(
+        self._control_channel)
+    self.worker = SdkWorker(state_stub, self._data_channel_factory)
+
+    responses = queue.Queue()
+    no_more_work = object()
+    def get_responses():
+      while True:
+        response = responses.get()
+        if response is no_more_work:
+          return
+        yield response
+
+    def process_requests():
+      for work_request in contol_stub.Control(get_responses()):
+        logging.info('Got work %s', work_request.instruction_id)
+        try:
+          response = self.worker.do_instruction(work_request)
+        except Exception:  # pylint: disable=broad-except
+          response = beam_fn_api_pb2.InstructionResponse(
+              instruction_id=work_request.instruction_id,
+              error=traceback.format_exc())
+        responses.put(response)
+    t = threading.Thread(target=process_requests)
+    t.start()
+    t.join()
+    # get_responses may be blocked on responses.get(), but we need to return
+    # control to its caller.
+    responses.put(no_more_work)
+    self._data_channel_factory.close()
+    logging.info('Done consuming work.')
+
+
+class SdkWorker(object):
+
+  def __init__(self, state_handler, data_channel_factory):
+    self.fns = {}
+    self.state_handler = state_handler
+    self.data_channel_factory = data_channel_factory
+
+  def do_instruction(self, request):
+    request_type = request.WhichOneof('request')
+    if request_type:
+      # E.g. if register is set, this will construct
+      # InstructionResponse(register=self.register(request.register))
+      return beam_fn_api_pb2.InstructionResponse(**{
+          'instruction_id': request.instruction_id,
+          request_type: getattr(self, request_type)
+                        (getattr(request, request_type), request.instruction_id)
+      })
+    else:
+      raise NotImplementedError
+
+  def register(self, request, unused_instruction_id=None):
+    for process_bundle_descriptor in request.process_bundle_descriptor:
+      self.fns[process_bundle_descriptor.id] = process_bundle_descriptor
+      for p_transform in list(process_bundle_descriptor.primitive_transform):
+        self.fns[p_transform.function_spec.id] = p_transform.function_spec
+    return beam_fn_api_pb2.RegisterResponse()
+
+  def initial_source_split(self, request, unused_instruction_id=None):
+    source_spec = self.fns[request.source_reference]
+    assert source_spec.urn == PYTHON_SOURCE_URN
+    source_bundle = unpack_and_deserialize_py_fn(
+        self.fns[request.source_reference])
+    splits = source_bundle.source.split(request.desired_bundle_size_bytes,
+                                        source_bundle.start_position,
+                                        source_bundle.stop_position)
+    response = beam_fn_api_pb2.InitialSourceSplitResponse()
+    response.splits.extend([
+        beam_fn_api_pb2.SourceSplit(
+            source=serialize_and_pack_py_fn(split, PYTHON_SOURCE_URN),
+            relative_size=split.weight,
+        )
+        for split in splits
+    ])
+    return response
+
+  def create_execution_tree(self, descriptor):
+    # TODO(vikasrk): Add an id field to Coder proto and use that instead.
+    coders = {coder.function_spec.id: operation_specs.get_coder_from_spec(
+        json.loads(unpack_function_spec_data(coder.function_spec)))
+              for coder in descriptor.coders}
+
+    counter_factory = counters.CounterFactory()
+    # TODO(robertwb): Figure out the correct prefix to use for output counters
+    # from StateSampler.
+    state_sampler = statesampler.StateSampler(
+        'fnapi-step%s-' % descriptor.id, counter_factory)
+    consumers = collections.defaultdict(lambda: collections.defaultdict(list))
+    ops_by_id = {}
+    reversed_ops = []
+
+    for transform in reversed(descriptor.primitive_transform):
+      # TODO(robertwb): Figure out how to plumb through the operation name (e.g.
+      # "s3") from the service through the FnAPI so that msec counters can be
+      # reported and correctly plumbed through the service and the UI.
+      operation_name = 'fnapis%s' % transform.id
+
+      def only_element(iterable):
+        element, = iterable
+        return element
+
+      if transform.function_spec.urn == DATA_OUTPUT_URN:
+        target = beam_fn_api_pb2.Target(
+            primitive_transform_reference=transform.id,
+            name=only_element(transform.outputs.keys()))
+
+        op = DataOutputOperation(
+            operation_name,
+            transform.step_name,
+            consumers[transform.id],
+            counter_factory,
+            state_sampler,
+            coders[only_element(transform.outputs.values()).coder_reference],
+            target,
+            self.data_channel_factory.create_data_channel(
+                transform.function_spec))
+
+      elif transform.function_spec.urn == DATA_INPUT_URN:
+        target = beam_fn_api_pb2.Target(
+            primitive_transform_reference=transform.id,
+            name=only_element(transform.inputs.keys()))
+        op = DataInputOperation(
+            operation_name,
+            transform.step_name,
+            consumers[transform.id],
+            counter_factory,
+            state_sampler,
+            coders[only_element(transform.outputs.values()).coder_reference],
+            target,
+            self.data_channel_factory.create_data_channel(
+                transform.function_spec))
+
+      elif transform.function_spec.urn == PYTHON_DOFN_URN:
+        def create_side_input(tag, si):
+          # TODO(robertwb): Extract windows (and keys) out of element data.
+          return operation_specs.WorkerSideInputSource(
+              tag=tag,
+              source=SideInputSource(
+                  self.state_handler,
+                  beam_fn_api_pb2.StateKey(
+                      function_spec_reference=si.view_fn.id),
+                  coder=unpack_and_deserialize_py_fn(si.view_fn)))
+        output_tags = list(transform.outputs.keys())
+        spec = operation_specs.WorkerDoFn(
+            serialized_fn=unpack_function_spec_data(transform.function_spec),
+            output_tags=output_tags,
+            input=None,
+            side_inputs=[create_side_input(tag, si)
+                         for tag, si in transform.side_inputs.items()],
+            output_coders=[coders[transform.outputs[out].coder_reference]
+                           for out in output_tags])
+
+        op = operations.DoOperation(operation_name, spec, counter_factory,
+                                    state_sampler)
+        # TODO(robertwb): Move these to the constructor.
+        op.step_name = transform.step_name
+        for tag, op_consumers in consumers[transform.id].items():
+          for consumer in op_consumers:
+            op.add_receiver(
+                consumer, output_tags.index(tag))
+
+      elif transform.function_spec.urn == IDENTITY_DOFN_URN:
+        op = operations.FlattenOperation(operation_name, None, counter_factory,
+                                         state_sampler)
+        # TODO(robertwb): Move these to the constructor.
+        op.step_name = transform.step_name
+        for tag, op_consumers in consumers[transform.id].items():
+          for consumer in op_consumers:
+            op.add_receiver(consumer, 0)
+
+      elif transform.function_spec.urn == PYTHON_SOURCE_URN:
+        source = load_compressed(unpack_function_spec_data(
+            transform.function_spec))
+        # TODO(vikasrk): Remove this once custom source is implemented with
+        # splittable dofn via the data plane.
+        spec = operation_specs.WorkerRead(
+            iobase.SourceBundle(1.0, source, None, None),
+            [WindowedValueCoder(source.default_output_coder())])
+        op = operations.ReadOperation(operation_name, spec, counter_factory,
+                                      state_sampler)
+        op.step_name = transform.step_name
+        output_tags = list(transform.outputs.keys())
+        for tag, op_consumers in consumers[transform.id].items():
+          for consumer in op_consumers:
+            op.add_receiver(
+                consumer, output_tags.index(tag))
+
+      else:
+        raise NotImplementedError
+
+      # Record consumers.
+      for _, inputs in transform.inputs.items():
+        for target in inputs.target:
+          consumers[target.primitive_transform_reference][target.name].append(
+              op)
+
+      reversed_ops.append(op)
+      ops_by_id[transform.id] = op
+
+    return list(reversed(reversed_ops)), ops_by_id
+
+  def process_bundle(self, request, instruction_id):
+    ops, ops_by_id = self.create_execution_tree(
+        self.fns[request.process_bundle_descriptor_reference])
+
+    expected_inputs = []
+    for _, op in ops_by_id.items():
+      if isinstance(op, DataOutputOperation):
+        # TODO(robertwb): Is there a better way to pass the instruction id to
+        # the operation?
+        op.set_output_stream(op.data_channel.output_stream(
+            instruction_id, op.target))
+      elif isinstance(op, DataInputOperation):
+        # We must wait until we receive "end of stream" for each of these ops.
+        expected_inputs.append(op)
+
+    # Start all operations.
+    for op in reversed(ops):
+      logging.info('start %s', op)
+      op.start()
+
+    # Inject inputs from data plane.
+    for input_op in expected_inputs:
+      for data in input_op.data_channel.input_elements(
+          instruction_id, [input_op.target]):
+        # ignores input name
+        target_op = ops_by_id[data.target.primitive_transform_reference]
+        # lacks coder for non-input ops
+        target_op.process_encoded(data.data)
+
+    # Finish all operations.
+    for op in ops:
+      logging.info('finish %s', op)
+      op.finish()
+
+    return beam_fn_api_pb2.ProcessBundleResponse()
+
+
diff --git a/sdks/python/apache_beam/runners/sdk_harness_runner.py b/sdks/python/apache_beam/runners/sdk_harness_runner.py
new file mode 100644
index 000000000000..6224c75a801a
--- /dev/null
+++ b/sdks/python/apache_beam/runners/sdk_harness_runner.py
@@ -0,0 +1,457 @@
+"""A PipelineRunner using the SDK harness.
+"""
+import collections
+import json
+import logging
+import Queue as queue
+import threading
+
+import apache_beam as beam
+from apache_beam.coders import WindowedValueCoder
+from apache_beam.coders.coder_impl import create_InputStream
+from apache_beam.coders.coder_impl import create_OutputStream
+from apache_beam.internal import pickler
+from apache_beam.io import iobase
+from apache_beam.metrics.execution import MetricsEnvironment
+from apache_beam.transforms.window import GlobalWindows
+from concurrent import futures
+from dataflow_worker import operation_specs
+from dataflow_worker import worker_runner_base
+from dataflow_worker.fn_harness import beam_fn_api_pb2
+from dataflow_worker.fn_harness import data_plane
+from dataflow_worker.fn_harness import sdk_harness
+import grpc
+import portpicker
+
+
+def streaming_rpc_handler(cls, method_name):
+  """Un-inverts the flow of control between the runner and the sdk harness."""
+
+  class StreamingRpcHandler(cls):
+
+    _DONE = object()
+
+    def __init__(self):
+      self._push_queue = queue.Queue()
+      self._pull_queue = queue.Queue()
+      setattr(self, method_name, self.run)
+      self._read_thread = threading.Thread(target=self._read)
+
+    def run(self, iterator, context):
+      self._inputs = iterator
+      # Note: We only support one client for now.
+      self._read_thread.start()
+      while True:
+        to_push = self._push_queue.get()
+        if to_push is self._DONE:
+          return
+        yield to_push
+
+    def _read(self):
+      for data in self._inputs:
+        self._pull_queue.put(data)
+
+    def push(self, item):
+      self._push_queue.put(item)
+
+    def pull(self, timeout=None):
+      return self._pull_queue.get(timeout=timeout)
+
+    def empty(self):
+      return self._pull_queue.empty()
+
+    def done(self):
+      self.push(self._DONE)
+      self._read_thread.join()
+
+  return StreamingRpcHandler()
+
+
+class OldeSourceSplittableDoFn(beam.DoFn):
+  """A DoFn that reads and emits an entire source.
+  """
+
+  # TODO(robertwb): Make this a full SDF with progress splitting, etc.
+  def process(self, source):
+    if isinstance(source, iobase.SourceBundle):
+      for value in source.source.read(source.source.get_range_tracker(
+          source.start_position, source.stop_position)):
+        yield value
+    else:
+      # Dataflow native source
+      with source.reader() as reader:
+        for value in reader:
+          yield value
+
+
+# See DataflowRunner._pardo_fn_data
+OLDE_SOURCE_SPLITTABLE_DOFN_DATA = pickler.dumps(
+    (OldeSourceSplittableDoFn(), (), {}, [],
+     beam.transforms.core.Windowing(GlobalWindows())))
+
+
+class SdkHarnessRunner(worker_runner_base.WorkerRunnerBase):
+
+  def __init__(self):
+    super(SdkHarnessRunner, self).__init__()
+    self._last_uid = -1
+
+  def _set_metrics_support(self):
+    MetricsEnvironment.set_metrics_supported(False)
+
+  def _next_uid(self):
+    self._last_uid += 1
+    return str(self._last_uid)
+
+  def _map_task_registration(self, map_task, state_handler,
+                             data_operation_spec):
+    input_data = {}
+    runner_sinks = {}
+    transforms = []
+    transform_index_to_id = {}
+
+    # Maps coders to new coder objects and references.
+    coders = {}
+
+    def coder_id(coder):
+      if coder not in coders:
+        coders[coder] = beam_fn_api_pb2.Coder(
+            function_spec=sdk_harness.pack_function_spec_data(
+                json.dumps(coder.as_cloud_object()),
+                sdk_harness.PYTHON_CODER_URN, id=self._next_uid()))
+
+      return coders[coder].function_spec.id
+
+    def output_tags(op):
+      return getattr(op, 'output_tags', ['out'])
+
+    def as_target(op_input):
+      input_op_index, input_output_index = op_input
+      input_op = map_task[input_op_index][1]
+      return {
+          'ignored_input_tag':
+              beam_fn_api_pb2.Target.List(target=[
+                  beam_fn_api_pb2.Target(
+                      primitive_transform_reference=transform_index_to_id[
+                          input_op_index],
+                      name=output_tags(input_op)[input_output_index])
+              ])
+      }
+
+    def outputs(op):
+      return {
+          tag: beam_fn_api_pb2.PCollection(coder_reference=coder_id(coder))
+          for tag, coder in zip(output_tags(op), op.output_coders)
+      }
+
+    for op_ix, (stage_name, operation) in enumerate(map_task):
+      transform_id = transform_index_to_id[op_ix] = self._next_uid()
+      if isinstance(operation, operation_specs.WorkerInMemoryWrite):
+        # Write this data back to the runner.
+        fn = beam_fn_api_pb2.FunctionSpec(urn=sdk_harness.DATA_OUTPUT_URN,
+                                          id=self._next_uid())
+        if data_operation_spec:
+          fn.data.Pack(data_operation_spec)
+        inputs = as_target(operation.input)
+        side_inputs = {}
+        runner_sinks[(transform_id, 'out')] = operation
+
+      elif isinstance(operation, operation_specs.WorkerRead):
+        # A Read is either translated to a direct injection of windowed values
+        # into the sdk worker, or an injection of the source object into the
+        # sdk worker as data followed by an SDF that reads that source.
+        if (isinstance(operation.source.source,
+                       worker_runner_base.InMemorySource)
+            and isinstance(operation.source.source.default_output_coder(),
+                           WindowedValueCoder)):
+          output_stream = create_OutputStream()
+          element_coder = (
+              operation.source.source.default_output_coder().get_impl())
+          # Re-encode the elements in the nested context and
+          # concatenate them together
+          for element in operation.source.source.read(None):
+            element_coder.encode_to_stream(element, output_stream, True)
+          target_name = '%s' % self._next_uid()
+          input_data[(transform_id, target_name)] = output_stream.get()
+          fn = beam_fn_api_pb2.FunctionSpec(urn=sdk_harness.DATA_INPUT_URN,
+                                            id=self._next_uid())
+          if data_operation_spec:
+            fn.data.Pack(data_operation_spec)
+          inputs = {target_name: beam_fn_api_pb2.Target.List()}
+          side_inputs = {}
+        else:
+          # Read the source object from the runner.
+          source_coder = beam.coders.DillCoder()
+          input_transform_id = self._next_uid()
+          output_stream = create_OutputStream()
+          source_coder.get_impl().encode_to_stream(
+              GlobalWindows.windowed_value(operation.source),
+              output_stream,
+              True)
+          target_name = '%s' % self._next_uid()
+          input_data[(input_transform_id, target_name)] = output_stream.get()
+          input_ptransform = beam_fn_api_pb2.PrimitiveTransform(
+              id=input_transform_id,
+              function_spec=beam_fn_api_pb2.FunctionSpec(
+                  urn=sdk_harness.DATA_INPUT_URN,
+                  id=self._next_uid()),
+              # TODO(robertwb): Possible name collision.
+              step_name=stage_name + '/inject_source',
+              inputs={target_name: beam_fn_api_pb2.Target.List()},
+              outputs={
+                  'out':
+                      beam_fn_api_pb2.PCollection(
+                          coder_reference=coder_id(source_coder))
+              })
+          if data_operation_spec:
+            input_ptransform.function_spec.data.Pack(data_operation_spec)
+          transforms.append(input_ptransform)
+
+          # Read the elements out of the source.
+          fn = sdk_harness.pack_function_spec_data(
+              OLDE_SOURCE_SPLITTABLE_DOFN_DATA,
+              sdk_harness.PYTHON_DOFN_URN,
+              id=self._next_uid())
+          inputs = {
+              'ignored_input_tag':
+                  beam_fn_api_pb2.Target.List(target=[
+                      beam_fn_api_pb2.Target(
+                          primitive_transform_reference=input_transform_id,
+                          name='out')
+                  ])
+          }
+          side_inputs = {}
+
+      elif isinstance(operation, operation_specs.WorkerDoFn):
+        fn = sdk_harness.pack_function_spec_data(
+            operation.serialized_fn,
+            sdk_harness.PYTHON_DOFN_URN,
+            id=self._next_uid())
+        inputs = as_target(operation.input)
+        # Store the contents of each side input for state access.
+        for si in operation.side_inputs:
+          assert isinstance(si.source, iobase.BoundedSource)
+          element_coder = si.source.default_output_coder()
+          view_id = self._next_uid()
+          # TODO(robertwb): Actually flesh out the ViewFn API.
+          side_inputs[si.tag] = beam_fn_api_pb2.SideInput(
+              view_fn=sdk_harness.serialize_and_pack_py_fn(
+                  element_coder, urn=sdk_harness.PYTHON_ITERABLE_VIEWFN_URN,
+                  id=view_id))
+          # Re-encode the elements in the nested context and
+          # concatenate them together
+          output_stream = create_OutputStream()
+          for element in si.source.read(
+              si.source.get_range_tracker(None, None)):
+            element_coder.get_impl().encode_to_stream(
+                element, output_stream, True)
+          elements_data = output_stream.get()
+          state_key = beam_fn_api_pb2.StateKey(function_spec_reference=view_id)
+          state_handler.Clear(state_key)
+          state_handler.Append(
+              beam_fn_api_pb2.SimpleStateAppendRequest(
+                  state_key=state_key, data=[elements_data]))
+
+      elif isinstance(operation, operation_specs.WorkerFlatten):
+        fn = sdk_harness.pack_function_spec_data(
+            operation.serialized_fn,
+            sdk_harness.IDENTITY_DOFN_URN,
+            id=self._next_uid())
+        inputs = {
+            'ignored_input_tag':
+                beam_fn_api_pb2.Target.List(target=[
+                    beam_fn_api_pb2.Target(
+                        primitive_transform_reference=transform_index_to_id[
+                            input_op_index],
+                        name=output_tags(map_task[input_op_index][1])[
+                            input_output_index])
+                    for input_op_index, input_output_index in operation.inputs
+                ])
+        }
+        side_inputs = {}
+
+      else:
+        raise TypeError(operation)
+
+      ptransform = beam_fn_api_pb2.PrimitiveTransform(
+          id=transform_id,
+          function_spec=fn,
+          step_name=stage_name,
+          inputs=inputs,
+          side_inputs=side_inputs,
+          outputs=outputs(operation))
+      transforms.append(ptransform)
+
+    process_bundle_descriptor = beam_fn_api_pb2.ProcessBundleDescriptor(
+        id=self._next_uid(), coders=coders.values(),
+        primitive_transform=transforms)
+    return beam_fn_api_pb2.InstructionRequest(
+        instruction_id=self._next_uid(),
+        register=beam_fn_api_pb2.RegisterRequest(
+            process_bundle_descriptor=[process_bundle_descriptor
+                                      ])), runner_sinks, input_data
+
+  def _run_map_task(
+      self, map_task, control_handler, state_handler, data_plane_handler,
+      data_operation_spec):
+    registration, sinks, input_data = self._map_task_registration(
+        map_task, state_handler, data_operation_spec)
+    control_handler.push(registration)
+    process_bundle = beam_fn_api_pb2.InstructionRequest(
+        instruction_id=self._next_uid(),
+        process_bundle=beam_fn_api_pb2.ProcessBundleRequest(
+            process_bundle_descriptor_reference=registration.register.
+            process_bundle_descriptor[0].id))
+
+    for (transform_id, name), elements in input_data.items():
+      data_out = data_plane_handler.output_stream(
+          process_bundle.instruction_id, beam_fn_api_pb2.Target(
+              primitive_transform_reference=transform_id, name=name))
+      data_out.write(elements)
+      data_out.close()
+
+    control_handler.push(process_bundle)
+    while True:
+      result = control_handler.pull()
+      if result.instruction_id == process_bundle.instruction_id:
+        if result.error:
+          raise RuntimeError(result.error)
+        expected_targets = [
+            beam_fn_api_pb2.Target(primitive_transform_reference=transform_id,
+                                   name=output_name)
+            for (transform_id, output_name) in sinks.keys()]
+        for output in data_plane_handler.input_elements(
+            process_bundle.instruction_id, expected_targets):
+          target_tuple = (
+              output.target.primitive_transform_reference, output.target.name)
+          if target_tuple not in sinks:
+            # Unconsumed output.
+            continue
+          sink_op = sinks[target_tuple]
+          coder = sink_op.output_coders[0]
+          input_stream = create_InputStream(output.data)
+          elements = []
+          while input_stream.size() > 0:
+            elements.append(coder.get_impl().decode_from_stream(
+                input_stream, True))
+          if not sink_op.write_windowed_values:
+            elements = [e.value for e in elements]
+          for e in elements:
+            sink_op.output_buffer.append(e)
+        return
+
+  def execute_map_tasks(self, ordered_map_tasks, direct=True):
+    if direct:
+      controller = SdkHarnessRunner.DirectController()
+    else:
+      controller = SdkHarnessRunner.GrpcController()
+
+    try:
+      for _, map_task in ordered_map_tasks:
+        logging.info('Running %s', map_task)
+        self._run_map_task(
+            map_task, controller.control_handler, controller.state_handler,
+            controller.data_plane_handler, controller.data_operation_spec())
+    finally:
+      controller.close()
+
+  class SimpleState(beam_fn_api_pb2.SimpleBeamFnStateServicer):
+
+    def __init__(self):
+      self._all = collections.defaultdict(list)
+
+    def Get(self, state_key):
+      return beam_fn_api_pb2.Elements.Data(
+          data=''.join(self._all[self._to_key(state_key)]))
+
+    def Append(self, append_request):
+      self._all[self._to_key(append_request.state_key)].extend(
+          append_request.data)
+
+    def Clear(self, state_key):
+      try:
+        del self._all[self._to_key(state_key)]
+      except KeyError:
+        pass
+
+    @staticmethod
+    def _to_key(state_key):
+      return (state_key.function_spec_reference, state_key.window,
+              state_key.key)
+
+  class DirectController(object):
+    """An in-memory controller for fn API control, state and data planes."""
+
+    def __init__(self):
+      self._responses = []
+      self.state_handler = SdkHarnessRunner.SimpleState()
+      self.control_handler = self
+      self.data_plane_handler = data_plane.InMemoryDataChannel()
+      self.worker = sdk_harness.SdkWorker(
+          self.state_handler, data_plane.InMemoryDataChannelFactory(
+              self.data_plane_handler.inverse()))
+
+    def push(self, request):
+      logging.info('CONTROL REQUEST %s', request)
+      response = self.worker.do_instruction(request)
+      logging.info('CONTROL RESPONSE %s', response)
+      self._responses.append(response)
+
+    def pull(self):
+      return self._responses.pop(0)
+
+    def done(self):
+      pass
+
+    def close(self):
+      pass
+
+    def data_operation_spec(self):
+      return None
+
+  class GrpcController(object):
+    """An grpc based controller for fn API control, state and data planes."""
+
+    def __init__(self):
+      self.state_handler = SdkHarnessRunner.SimpleState()
+      self.control_server = grpc.server(
+          futures.ThreadPoolExecutor(max_workers=10))
+      self.control_port = portpicker.pick_unused_port()
+      self.control_server.add_insecure_port('[::]:%s' % self.control_port)
+
+      self.data_server = grpc.server(futures.ThreadPoolExecutor(max_workers=10))
+      self.data_port = portpicker.pick_unused_port()
+      self.data_server.add_insecure_port('[::]:%s' % self.data_port)
+
+      self.control_handler = streaming_rpc_handler(
+          beam_fn_api_pb2.BeamFnControlServicer, 'Control')
+      beam_fn_api_pb2.add_BeamFnControlServicer_to_server(
+          self.control_handler, self.control_server)
+
+      self.data_plane_handler = data_plane.GrpcServerDataChannel()
+      beam_fn_api_pb2.add_BeamFnDataServicer_to_server(
+          self.data_plane_handler, self.data_server)
+
+      logging.info('starting control server on port %s', self.control_port)
+      logging.info('starting data server on port %s', self.data_port)
+      self.data_server.start()
+      self.control_server.start()
+
+      self.worker = sdk_harness.SdkHarness(
+          grpc.insecure_channel('localhost:%s' % self.control_port))
+      self.worker_thread = threading.Thread(target=self.worker.run)
+      logging.info('starting worker')
+      self.worker_thread.start()
+
+    def data_operation_spec(self):
+      url = 'localhost:%s' % self.data_port
+      remote_grpc_port = beam_fn_api_pb2.RemoteGrpcPort()
+      remote_grpc_port.api_service_descriptor.url = url
+      return remote_grpc_port
+
+    def close(self):
+      self.control_handler.done()
+      self.worker_thread.join()
+      self.data_plane_handler.close()
+      self.control_server.stop(5).wait()
+      self.data_server.stop(5).wait()
diff --git a/sdks/python/apache_beam/runners/sdk_harness_runner_test.py b/sdks/python/apache_beam/runners/sdk_harness_runner_test.py
new file mode 100644
index 000000000000..9d700f0e87c3
--- /dev/null
+++ b/sdks/python/apache_beam/runners/sdk_harness_runner_test.py
@@ -0,0 +1,29 @@
+"""Tests for google.cloud.dataflow.worker.worker_runner."""
+
+import logging
+import unittest
+
+import google3
+
+import apache_beam as beam
+from dataflow_worker import worker_runner_base_test
+from dataflow_worker.fn_harness import sdk_harness_runner
+import faulthandler
+
+
+class SdkHarnessRunnerTest(worker_runner_base_test.WorkerRunnerBaseTest):
+
+  def create_pipeline(self):
+    return beam.Pipeline(runner=sdk_harness_runner.SdkHarnessRunner())
+
+  def test_combine_per_key(self):
+    # TODO(robertwb): Implement PGBKCV operation.
+    pass
+
+  # Inherits all tests from worker_runner_base_test.WorkerRunnerBaseTest
+
+
+if __name__ == '__main__':
+  logging.getLogger().setLevel(logging.INFO)
+  faulthandler.enable()
+  unittest.main()
diff --git a/sdks/python/apache_beam/runners/sdk_harness_test.py b/sdks/python/apache_beam/runners/sdk_harness_test.py
new file mode 100644
index 000000000000..41453b93530c
--- /dev/null
+++ b/sdks/python/apache_beam/runners/sdk_harness_test.py
@@ -0,0 +1,168 @@
+# Copyright 2016 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Tests for dataflow_worker.fn_harness.sdk_harness."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import logging
+import unittest
+
+from apache_beam.io.concat_source_test import RangeSource
+from apache_beam.io.iobase import SourceBundle
+from concurrent import futures
+from dataflow_worker.fn_harness import beam_fn_api_pb2
+from dataflow_worker.fn_harness import sdk_harness
+from dataflow_worker.fn_harness.data_plane import GrpcClientDataChannelFactory
+import grpc
+import portpicker
+
+
+class BeamFnControlServicer(beam_fn_api_pb2.BeamFnControlServicer):
+
+  def __init__(self, requests, raise_errors=True):
+    self.requests = requests
+    self.instruction_ids = set(r.instruction_id for r in requests)
+    self.responses = {}
+    self.raise_errors = raise_errors
+
+  def Control(self, response_iterator, context):
+    for request in self.requests:
+      logging.info("Sending request %s", request)
+      yield request
+    for response in response_iterator:
+      logging.info("Got response %s", response)
+      if response.instruction_id != -1:
+        assert response.instruction_id in self.instruction_ids
+        assert response.instruction_id not in self.responses
+        self.responses[response.instruction_id] = response
+        if self.raise_errors and response.error:
+          raise RuntimeError(response.error)
+        elif len(self.responses) == len(self.requests):
+          logging.info("All %s instructions finished.", len(self.requests))
+          return
+    raise RuntimeError("Missing responses: %s" %
+                       (self.instruction_ids - set(self.responses.keys())))
+
+
+class SdkHarnessTest(unittest.TestCase):
+
+  def test_fn_registration(self):
+
+    fns = [beam_fn_api_pb2.FunctionSpec(id=str(ix)) for ix in range(4)]
+    test_port = portpicker.pick_unused_port()
+
+    process_bundle_descriptors = [beam_fn_api_pb2.ProcessBundleDescriptor(
+        id=str(100+ix),
+        primitive_transform=[
+            beam_fn_api_pb2.PrimitiveTransform(function_spec=fn)])
+                                  for ix, fn in enumerate(fns)]
+
+    test_controller = BeamFnControlServicer([beam_fn_api_pb2.InstructionRequest(
+        register=beam_fn_api_pb2.RegisterRequest(
+            process_bundle_descriptor=process_bundle_descriptors))])
+
+    server = grpc.server(futures.ThreadPoolExecutor(max_workers=10))
+    beam_fn_api_pb2.add_BeamFnControlServicer_to_server(test_controller, server)
+    server.add_insecure_port("[::]:%s" % test_port)
+    server.start()
+
+    channel = grpc.insecure_channel("localhost:%s" % test_port)
+    harness = sdk_harness.SdkHarness(channel)
+    harness.run()
+    self.assertEqual(
+        harness.worker.fns,
+        {item.id: item for item in fns + process_bundle_descriptors})
+
+  @unittest.skip("initial splitting not in proto")
+  def test_source_split(self):
+    source = RangeSource(0, 100)
+    expected_splits = list(source.split(30))
+
+    worker = sdk_harness.SdkWorker(None, GrpcClientDataChannelFactory())
+    worker.register(
+        beam_fn_api_pb2.RegisterRequest(
+            process_bundle_descriptor=[beam_fn_api_pb2.ProcessBundleDescriptor(
+                primitive_transform=[beam_fn_api_pb2.PrimitiveTransform(
+                    function_spec=sdk_harness.serialize_and_pack_py_fn(
+                        SourceBundle(1.0, source, None, None),
+                        sdk_harness.PYTHON_SOURCE_URN,
+                        id="src"))])]))
+    split_response = worker.initial_source_split(
+        beam_fn_api_pb2.InitialSourceSplitRequest(
+            desired_bundle_size_bytes=30,
+            source_reference="src"))
+
+    self.assertEqual(
+        expected_splits,
+        [sdk_harness.unpack_and_deserialize_py_fn(s.source)
+         for s in split_response.splits])
+
+    self.assertEqual(
+        [s.weight for s in expected_splits],
+        [s.relative_size for s in split_response.splits])
+
+  @unittest.skip("initial splitting not in proto")
+  def test_source_split_via_instruction(self):
+
+    source = RangeSource(0, 100)
+    expected_splits = list(source.split(30))
+
+    test_port = portpicker.pick_unused_port()
+
+    test_controller = BeamFnControlServicer([
+        beam_fn_api_pb2.InstructionRequest(
+            instruction_id="register_request",
+            register=beam_fn_api_pb2.RegisterRequest(
+                process_bundle_descriptor=[
+                    beam_fn_api_pb2.ProcessBundleDescriptor(
+                        primitive_transform=[beam_fn_api_pb2.PrimitiveTransform(
+                            function_spec=sdk_harness.serialize_and_pack_py_fn(
+                                SourceBundle(1.0, source, None, None),
+                                sdk_harness.PYTHON_SOURCE_URN,
+                                id="src"))])])),
+        beam_fn_api_pb2.InstructionRequest(
+            instruction_id="split_request",
+            initial_source_split=beam_fn_api_pb2.InitialSourceSplitRequest(
+                desired_bundle_size_bytes=30,
+                source_reference="src"))
+        ])
+
+    server = grpc.server(futures.ThreadPoolExecutor(max_workers=10))
+    beam_fn_api_pb2.add_BeamFnControlServicer_to_server(test_controller, server)
+    server.add_insecure_port("[::]:%s" % test_port)
+    server.start()
+
+    channel = grpc.insecure_channel("localhost:%s" % test_port)
+    harness = sdk_harness.SdkHarness(channel)
+    harness.run()
+
+    split_response = test_controller.responses[
+        "split_request"].initial_source_split
+
+    self.assertEqual(
+        expected_splits,
+        [sdk_harness.unpack_and_deserialize_py_fn(s.source)
+         for s in split_response.splits])
+
+    self.assertEqual(
+        [s.weight for s in expected_splits],
+        [s.relative_size for s in split_response.splits])
+
+
+if __name__ == "__main__":
+  logging.getLogger().setLevel(logging.INFO)
+  unittest.main()
diff --git a/sdks/python/apache_beam/runners/worker_runner_base.py b/sdks/python/apache_beam/runners/worker_runner_base.py
new file mode 100644
index 000000000000..7e1306562441
--- /dev/null
+++ b/sdks/python/apache_beam/runners/worker_runner_base.py
@@ -0,0 +1,448 @@
+"""Beam runner for testing/profiling worker code directly.
+"""
+import collections
+import logging
+import time
+
+import google3
+
+import apache_beam as beam
+from apache_beam.internal import pickler
+from apache_beam.io import iobase
+from apache_beam.metrics.execution import MetricsEnvironment
+from apache_beam.runners import DataflowRunner
+from apache_beam.runners.dataflow.internal.dependency import _dependency_file_copy
+from apache_beam.runners.dataflow.internal.names import PropertyNames
+from apache_beam.runners.runner import PipelineResult
+from apache_beam.runners.runner import PipelineRunner
+from apache_beam.runners.runner import PipelineState
+from apache_beam.typehints import typehints
+from apache_beam.utils import pipeline_options
+from apache_beam.utils import profiler
+from apache_beam.utils.counters import CounterFactory
+from dataflow_worker import operation_specs as maptask
+from dataflow_worker import operations
+from dataflow_worker import statesampler
+
+
+class WorkerRunnerBase(PipelineRunner):
+  """Beam runner translating a pipeline into map tasks that are then executed.
+
+  Primarily intended for testing and profiling the worker code paths.
+  """
+
+  def __init__(self):
+    self.executors = []
+
+  def _set_metrics_support(self):
+    """Set whether this runner supports metrics or not.
+
+    Can be set to False by subclass runners.
+    """
+    MetricsEnvironment.set_metrics_supported(True)
+
+  def run(self, pipeline):
+    self._set_metrics_support()
+    # List of map tasks  Each map task is a list of
+    # (stage_name, maptask.WorkerOperation) instructions.
+    self.map_tasks = []
+
+    # Map of pvalues to
+    # (map_task_index, producer_operation_index, producer_output_index)
+    self.outputs = {}
+
+    # Unique mappings of PCollections to strings.
+    self.side_input_labels = collections.defaultdict(
+        lambda: str(len(self.side_input_labels)))
+
+    # Mapping of map task indices to all map tasks that must preceed them.
+    self.dependencies = collections.defaultdict(set)
+
+    # Visit the graph, building up the map_tasks and their metadata.
+    super(WorkerRunnerBase, self).run(pipeline)
+
+    # Now run the tasks in topological order.
+    def compute_depth_map(deps):
+      memoized = {}
+
+      def compute_depth(x):
+        if x not in memoized:
+          memoized[x] = 1 + max([-1] + [compute_depth(y) for y in deps[x]])
+        return memoized[x]
+
+      return {x: compute_depth(x) for x in deps.keys()}
+
+    map_task_depths = compute_depth_map(self.dependencies)
+    ordered_map_tasks = sorted((map_task_depths.get(ix, -1), map_task)
+                               for ix, map_task in enumerate(self.map_tasks))
+
+    profile_options = pipeline.options.view_as(
+        pipeline_options.ProfilingOptions)
+    if profile_options.profile_cpu:
+      with profiler.Profile(
+          profile_id='worker-runner',
+          profile_location=profile_options.profile_location,
+          log_results=True, file_copy_fn=_dependency_file_copy):
+        self.execute_map_tasks(ordered_map_tasks)
+    else:
+      self.execute_map_tasks(ordered_map_tasks)
+
+    return WorkerRunnerResult(PipelineState.UNKNOWN)
+
+  def metrics_containers(self):
+    return [op.metrics_container
+            for ex in self.executors
+            for op in ex.operations()]
+
+  def execute_map_tasks(self, ordered_map_tasks):
+    tt = time.time()
+    for ix, (_, map_task) in enumerate(ordered_map_tasks):
+      logging.info('Running %s', map_task)
+      t = time.time()
+      stage_names, all_operations = zip(*map_task)
+      # TODO(robertwb): The DataflowRunner worker receives system step names
+      # (e.g. "s3") that are used to label the output msec counters.  We use the
+      # operation names here, but this is not the same scheme used by the
+      # DataflowRunner; the result is that the output msec counters are named
+      # differently.
+      system_names = stage_names
+      # Create the CounterFactory and StateSampler for this MapTask.
+      # TODO(robertwb): Output counters produced here are currently ignored.
+      counter_factory = CounterFactory()
+      state_sampler = statesampler.StateSampler('%s-' % ix, counter_factory)
+      map_executor = operations.SimpleMapTaskExecutor(
+          maptask.MapTask(
+              all_operations, 'S%02d' % ix, system_names, stage_names),
+          counter_factory,
+          state_sampler)
+      self.executors.append(map_executor)
+      map_executor.execute()
+      logging.info(
+          'Stage %s finished: %0.3f sec', stage_names[0], time.time() - t)
+    logging.info('Total time: %0.3f sec', time.time() - tt)
+
+  def run_Read(self, transform_node):
+    self._run_read_from(transform_node, transform_node.transform.source)
+
+  def _run_read_from(self, transform_node, source):
+    """Used when this operation is the result of reading source."""
+    if not isinstance(source, iobase.NativeSource):
+      source = iobase.SourceBundle(1.0, source, None, None)
+    output = transform_node.outputs[None]
+    element_coder = self._get_coder(output)
+    read_op = maptask.WorkerRead(source, output_coders=[element_coder])
+    self.outputs[output] = len(self.map_tasks), 0, 0
+    self.map_tasks.append([(transform_node.full_label, read_op)])
+    return len(self.map_tasks) - 1
+
+  def run_ParDo(self, transform_node):
+    transform = transform_node.transform
+    output = transform_node.outputs[None]
+    element_coder = self._get_coder(output)
+    map_task_index, producer_index, output_index = self.outputs[
+        transform_node.inputs[0]]
+
+    # If any of this ParDo's side inputs depend on outputs from this map_task,
+    # we can't continue growing this map task.
+    def is_reachable(leaf, root):
+      if leaf == root:
+        return True
+      else:
+        return any(is_reachable(x, root) for x in self.dependencies[leaf])
+
+    if any(is_reachable(self.outputs[side_input.pvalue][0], map_task_index)
+           for side_input in transform_node.side_inputs):
+      # Start a new map tasks.
+      input_element_coder = self._get_coder(transform_node.inputs[0])
+
+      output_buffer = OutputBuffer(input_element_coder)
+
+      fusion_break_write = maptask.WorkerInMemoryWrite(
+          output_buffer=output_buffer,
+          write_windowed_values=True,
+          input=(producer_index, output_index),
+          output_coders=[input_element_coder])
+      self.map_tasks[map_task_index].append(
+          (transform_node.full_label + '/Write', fusion_break_write))
+
+      original_map_task_index = map_task_index
+      map_task_index, producer_index, output_index = len(self.map_tasks), 0, 0
+
+      fusion_break_read = maptask.WorkerRead(
+          output_buffer.source_bundle(),
+          output_coders=[input_element_coder])
+      self.map_tasks.append(
+          [(transform_node.full_label + '/Read', fusion_break_read)])
+
+      self.dependencies[map_task_index].add(original_map_task_index)
+
+    def create_side_read(side_input):
+      label = self.side_input_labels[side_input]
+      output_buffer = self.run_side_write(
+          side_input.pvalue, '%s/%s' % (transform_node.full_label, label))
+      return maptask.WorkerSideInputSource(output_buffer.source(), label)
+
+    do_op = maptask.WorkerDoFn(  #
+        serialized_fn=pickler.dumps(DataflowRunner._pardo_fn_data(
+            transform_node,
+            lambda side_input: self.side_input_labels[side_input])),
+        output_tags=[PropertyNames.OUT] + ['%s_%s' % (PropertyNames.OUT, tag)
+                                           for tag in transform.output_tags
+                                          ],
+        # Same assumption that DataflowRunner has about coders being compatible
+        # across outputs.
+        output_coders=[element_coder] * (len(transform.output_tags) + 1),
+        input=(producer_index, output_index),
+        side_inputs=[create_side_read(side_input)
+                     for side_input in transform_node.side_inputs])
+
+    producer_index = len(self.map_tasks[map_task_index])
+    self.outputs[transform_node.outputs[None]] = (
+        map_task_index, producer_index, 0)
+    for ix, tag in enumerate(transform.output_tags):
+      self.outputs[transform_node.outputs[
+          tag]] = map_task_index, producer_index, ix + 1
+    self.map_tasks[map_task_index].append((transform_node.full_label, do_op))
+
+    for side_input in transform_node.side_inputs:
+      self.dependencies[map_task_index].add(self.outputs[side_input.pvalue][0])
+
+  def run_side_write(self, pcoll, label):
+    map_task_index, producer_index, output_index = self.outputs[pcoll]
+
+    windowed_element_coder = self._get_coder(pcoll)
+    output_buffer = OutputBuffer(windowed_element_coder)
+    write_sideinput_op = maptask.WorkerInMemoryWrite(
+        output_buffer=output_buffer,
+        write_windowed_values=True,
+        input=(producer_index, output_index),
+        output_coders=[windowed_element_coder])
+    self.map_tasks[map_task_index].append(
+        (label, write_sideinput_op))
+    return output_buffer
+
+  def run_GroupByKeyOnly(self, transform_node):
+    map_task_index, producer_index, output_index = self.outputs[
+        transform_node.inputs[0]]
+    grouped_element_coder = self._get_coder(transform_node.outputs[None],
+                                            windowed=False)
+    windowed_ungrouped_element_coder = self._get_coder(transform_node.inputs[0])
+
+    output_buffer = GroupingOutputBuffer(grouped_element_coder)
+    shuffle_write = maptask.WorkerInMemoryWrite(
+        output_buffer=output_buffer,
+        write_windowed_values=False,
+        input=(producer_index, output_index),
+        output_coders=[windowed_ungrouped_element_coder])
+    self.map_tasks[map_task_index].append(
+        (transform_node.full_label + '/Write', shuffle_write))
+
+    output_map_task_index = self._run_read_from(
+        transform_node, output_buffer.source())
+    self.dependencies[output_map_task_index].add(map_task_index)
+
+  def run_Flatten(self, transform_node):
+    output_buffer = OutputBuffer(self._get_coder(transform_node.outputs[None]))
+    output_map_task = self._run_read_from(transform_node,
+                                          output_buffer.source())
+
+    for input in transform_node.inputs:
+      map_task_index, producer_index, output_index = self.outputs[input]
+      element_coder = self._get_coder(input)
+      flatten_write = maptask.WorkerInMemoryWrite(output_buffer=output_buffer,
+                                                  write_windowed_values=True,
+                                                  input=(producer_index,
+                                                         output_index),
+                                                  output_coders=[element_coder])
+      self.map_tasks[map_task_index].append(
+          (transform_node.full_label + '/Write', flatten_write))
+      self.dependencies[output_map_task].add(map_task_index)
+
+  def apply_CombinePerKey(self, transform, input):
+    # TODO(robertwb): Support side inputs.
+    assert not transform.args and not transform.kwargs
+    return (input
+            | PartialGroupByKeyCombineValues(transform.fn)
+            | beam.GroupByKey()
+            | MergeAccumulators(transform.fn)
+            | ExtractOutputs(transform.fn))
+
+  def run_PartialGroupByKeyCombineValues(self, transform_node):
+    element_coder = self._get_coder(transform_node.outputs[None])
+    _, producer_index, output_index = self.outputs[transform_node.inputs[0]]
+    combine_op = maptask.WorkerPartialGroupByKey(
+        combine_fn=pickler.dumps((transform_node.transform.combine_fn, (), {},
+                                  ())),
+        output_coders=[element_coder],
+        input=(producer_index, output_index))
+    self._run_as_op(transform_node, combine_op)
+
+  def run_MergeAccumulators(self, transform_node):
+    self._run_combine_transform(transform_node, 'merge')
+
+  def run_ExtractOutputs(self, transform_node):
+    self._run_combine_transform(transform_node, 'extract')
+
+  def _run_combine_transform(self, transform_node, phase):
+    transform = transform_node.transform
+    element_coder = self._get_coder(transform_node.outputs[None])
+    _, producer_index, output_index = self.outputs[transform_node.inputs[0]]
+    combine_op = maptask.WorkerCombineFn(serialized_fn=pickler.dumps(
+        (transform.combine_fn, (), {}, ())),
+                                         phase=phase,
+                                         output_coders=[element_coder],
+                                         input=(producer_index, output_index))
+    self._run_as_op(transform_node, combine_op)
+
+  def _get_coder(self, pvalue, windowed=True):
+    # TODO(robertwb): This should be an attribute of the pvalue itself.
+    return DataflowRunner._get_coder(
+        pvalue.element_type or typehints.Any,
+        pvalue.windowing.windowfn.get_window_coder() if windowed else None)
+
+  def _run_as_op(self, transform_node, op):
+    """Single-output operation in the same map task as its input."""
+    map_task_index, _, _ = self.outputs[transform_node.inputs[0]]
+    op_index = len(self.map_tasks[map_task_index])
+    output = transform_node.outputs[None]
+    self.outputs[output] = map_task_index, op_index, 0
+    self.map_tasks[map_task_index].append((transform_node.full_label, op))
+
+
+class InMemorySource(iobase.BoundedSource):
+  """Source for reading an (as-yet unwritten) set of in-memory encoded elements.
+  """
+
+  def __init__(self, encoded_elements, coder):
+    self._encoded_elements = encoded_elements
+    self._coder = coder
+
+  def get_range_tracker(self, unused_start_position, unused_end_position):
+    return None
+
+  def read(self, unused_range_tracker):
+    for encoded_element in self._encoded_elements:
+      yield self._coder.decode(encoded_element)
+
+  def default_output_coder(self):
+    return self._coder
+
+
+class OutputBuffer(object):
+
+  def __init__(self, coder):
+    self.coder = coder
+    self.elements = []
+    self.encoded_elements = []
+
+  def source(self):
+    return InMemorySource(self.encoded_elements, self.coder)
+
+  def source_bundle(self):
+    return iobase.SourceBundle(
+        1.0, InMemorySource(self.encoded_elements, self.coder), None, None)
+
+  def __repr__(self):
+    return 'GroupingOutput[%r]' % len(self.elements)
+
+  def append(self, value):
+    self.elements.append(value)
+    self.encoded_elements.append(self.coder.encode(value))
+
+
+class GroupingOutputBuffer(object):
+
+  def __init__(self, grouped_coder):
+    self.grouped_coder = grouped_coder
+    self.elements = collections.defaultdict(list)
+    self.frozen = False
+
+  def source(self):
+    return InMemorySource(self.encoded_elements, self.grouped_coder)
+
+  def __repr__(self):
+    return 'GroupingOutputBuffer[%r]' % len(self.elements)
+
+  def append(self, pair):
+    assert not self.frozen
+    k, v = pair
+    self.elements[k].append(v)
+
+  def freeze(self):
+    if not self.frozen:
+      self._encoded_elements = [self.grouped_coder.encode(kv)
+                                for kv in self.elements.iteritems()]
+    self.frozen = True
+    return self._encoded_elements
+
+  @property
+  def encoded_elements(self):
+    return GroupedOutputBuffer(self)
+
+
+class GroupedOutputBuffer(object):
+
+  def __init__(self, buffer):
+    self.buffer = buffer
+
+  def __getitem__(self, ix):
+    return self.buffer.freeze()[ix]
+
+  def __iter__(self):
+    return iter(self.buffer.freeze())
+
+  def __len__(self):
+    return len(self.buffer.freeze())
+
+  def __nonzero__(self):
+    return True
+
+
+class PartialGroupByKeyCombineValues(beam.PTransform):
+
+  def __init__(self, combine_fn, native=True):
+    self.combine_fn = combine_fn
+    self.native = native
+
+  def expand(self, input):
+    if self.native:
+      return beam.pvalue.PCollection(input.pipeline)
+    else:
+      def to_accumulator(v):
+        return self.combine_fn.add_input(
+            self.combine_fn.create_accumulator(), v)
+      return input | beam.Map(lambda (k, v): (k, to_accumulator(v)))
+
+
+class MergeAccumulators(beam.PTransform):
+
+  def __init__(self, combine_fn, native=True):
+    self.combine_fn = combine_fn
+    self.native = native
+
+  def expand(self, input):
+    if self.native:
+      return beam.pvalue.PCollection(input.pipeline)
+    else:
+      merge_accumulators = self.combine_fn.merge_accumulators
+      return input | beam.Map(lambda (k, vs): (k, merge_accumulators(vs)))
+
+
+class ExtractOutputs(beam.PTransform):
+
+  def __init__(self, combine_fn, native=True):
+    self.combine_fn = combine_fn
+    self.native = native
+
+  def expand(self, input):
+    if self.native:
+      return beam.pvalue.PCollection(input.pipeline)
+    else:
+      extract_output = self.combine_fn.extract_output
+      return input | beam.Map(lambda (k, v): (k, extract_output(v)))
+
+
+class WorkerRunnerResult(PipelineResult):
+
+  def wait_until_finish(self, duration=None):
+    pass
diff --git a/sdks/python/apache_beam/runners/worker_runner_base_test.py b/sdks/python/apache_beam/runners/worker_runner_base_test.py
new file mode 100644
index 000000000000..f45976c9002b
--- /dev/null
+++ b/sdks/python/apache_beam/runners/worker_runner_base_test.py
@@ -0,0 +1,185 @@
+"""Tests for google.cloud.dataflow.worker.worker_runner."""
+
+import logging
+import tempfile
+import unittest
+
+import google3
+
+import apache_beam as beam
+
+from apache_beam.metrics import Metrics
+from apache_beam.metrics.execution import MetricKey
+from apache_beam.metrics.execution import MetricsEnvironment
+from apache_beam.metrics.metricbase import MetricName
+
+from apache_beam.pvalue import AsList
+from apache_beam.transforms.util import assert_that
+from apache_beam.transforms.util import BeamAssertException
+from apache_beam.transforms.util import equal_to
+from apache_beam.transforms.window import TimestampedValue
+from dataflow_worker import worker_runner_base
+
+
+class WorkerRunnerBaseTest(unittest.TestCase):
+
+  def create_pipeline(self):
+    return beam.Pipeline(runner=worker_runner_base.WorkerRunnerBase())
+
+  def test_assert_that(self):
+    with self.assertRaises(BeamAssertException):
+      with self.create_pipeline() as p:
+        assert_that(p | beam.Create(['a', 'b']), equal_to(['a']))
+
+  def test_create(self):
+    with self.create_pipeline() as p:
+      assert_that(p | beam.Create(['a', 'b']), equal_to(['a', 'b']))
+
+  def test_pardo(self):
+    with self.create_pipeline() as p:
+      res = (p | beam.Create(['a', 'bc'])
+               | beam.Map(lambda e: e * 2)
+               | beam.Map(lambda e: e + 'x'))
+      assert_that(res, equal_to(['aax', 'bcbcx']))
+
+  def test_pardo_metrics(self):
+
+    class MyDoFn(beam.DoFn):
+
+      def start_bundle(self):
+        self.count = Metrics.counter(self.__class__, 'elements')
+
+      def process(self, element):
+        self.count.inc(element)
+        return [element]
+
+    class MyOtherDoFn(beam.DoFn):
+
+      def start_bundle(self):
+        self.count = Metrics.counter(self.__class__, 'elementsplusone')
+
+      def process(self, element):
+        self.count.inc(element + 1)
+        return [element]
+
+    with self.create_pipeline() as p:
+      res = (p | beam.Create([1, 2, 3])
+             | 'mydofn' >> beam.ParDo(MyDoFn())
+             | 'myotherdofn' >> beam.ParDo(MyOtherDoFn()))
+      p.run()
+      if not MetricsEnvironment.METRICS_SUPPORTED:
+        self.skipTest('Metrics are not supported.')
+
+      counter_updates = [{'key': key, 'value': val}
+                         for container in p.runner.metrics_containers()
+                         for key, val in
+                         container.get_updates().counters.items()]
+      counter_values = [update['value'] for update in counter_updates]
+      counter_keys = [update['key'] for update in counter_updates]
+      assert_that(res, equal_to([1, 2, 3]))
+      self.assertEqual(counter_values, [6, 9])
+      self.assertEqual(counter_keys, [
+          MetricKey('mydofn',
+                    MetricName(__name__ + '.MyDoFn', 'elements')),
+          MetricKey('myotherdofn',
+                    MetricName(__name__ + '.MyOtherDoFn', 'elementsplusone'))])
+
+  def test_pardo_side_outputs(self):
+    def tee(elem, *tags):
+      for tag in tags:
+        if tag in elem:
+          yield beam.pvalue.OutputValue(tag, elem)
+    with self.create_pipeline() as p:
+      xy = (p | 'Create' >> beam.Create(['x', 'y', 'xy'])
+              | beam.FlatMap(tee, 'x', 'y').with_outputs())
+      assert_that(xy.x, equal_to(['x', 'xy']), label='x')
+      assert_that(xy.y, equal_to(['y', 'xy']), label='y')
+
+  def test_pardo_side_and_main_outputs(self):
+    def even_odd(elem):
+      yield elem
+      yield beam.pvalue.OutputValue('odd' if elem % 2 else 'even', elem)
+    with self.create_pipeline() as p:
+      ints = p | beam.Create([1, 2, 3])
+      named = ints | 'named' >> beam.FlatMap(
+          even_odd).with_outputs('even', 'odd', main='all')
+      assert_that(named.all, equal_to([1, 2, 3]), label='named.all')
+      assert_that(named.even, equal_to([2]), label='named.even')
+      assert_that(named.odd, equal_to([1, 3]), label='named.odd')
+
+      unnamed = ints | 'unnamed' >> beam.FlatMap(even_odd).with_outputs()
+      unnamed[None] | beam.Map(id)
+      assert_that(unnamed[None], equal_to([1, 2, 3]), label='unnamed.all')
+      assert_that(unnamed.even, equal_to([2]), label='unnamed.even')
+      assert_that(unnamed.odd, equal_to([1, 3]), label='unnamed.odd')
+
+  def test_pardo_side_inputs(self):
+    def cross_product(elem, sides):
+      for side in sides:
+        yield elem, side
+    with self.create_pipeline() as p:
+      main = p | 'main' >> beam.Create(['a', 'b', 'c'])
+      side = p | 'side' >> beam.Create(['x', 'y'])
+      assert_that(main | beam.FlatMap(cross_product, AsList(side)),
+                  equal_to([('a', 'x'), ('b', 'x'), ('c', 'x'),
+                            ('a', 'y'), ('b', 'y'), ('c', 'y')]))
+
+  def test_pardo_unfusable_side_inputs(self):
+    def cross_product(elem, sides):
+      for side in sides:
+        yield elem, side
+    with self.create_pipeline() as p:
+      pcoll = p | beam.Create(['a', 'b'])
+      assert_that(pcoll | beam.FlatMap(cross_product, AsList(pcoll)),
+                  equal_to([('a', 'a'), ('a', 'b'), ('b', 'a'), ('b', 'b')]))
+
+    with self.create_pipeline() as p:
+      pcoll = p | beam.Create(['a', 'b'])
+      derived = ((pcoll,) | beam.Flatten()
+                 | beam.Map(lambda x: (x, x))
+                 | beam.GroupByKey()
+                 | 'Unkey' >> beam.Map(lambda (x, _): x))
+      assert_that(
+          pcoll | beam.FlatMap(cross_product, AsList(derived)),
+          equal_to([('a', 'a'), ('a', 'b'), ('b', 'a'), ('b', 'b')]))
+
+  def test_group_by_key(self):
+    with self.create_pipeline() as p:
+      res = (p | beam.Create([('a', 1), ('a', 2), ('b', 3)])
+               | beam.GroupByKey()
+               | beam.Map(lambda (k, vs): (k, sorted(vs))))
+      assert_that(res, equal_to([('a', [1, 2]), ('b', [3])]))
+
+  def test_flatten(self):
+    with self.create_pipeline() as p:
+      res = (p | 'a' >> beam.Create(['a']),
+             p | 'bc' >> beam.Create(['b', 'c']),
+             p | 'd' >> beam.Create(['d'])) | beam.Flatten()
+      assert_that(res, equal_to(['a', 'b', 'c', 'd']))
+
+  def test_combine_per_key(self):
+    with self.create_pipeline() as p:
+      res = (p | beam.Create([('a', 1), ('a', 2), ('b', 3)])
+               | beam.CombinePerKey(beam.combiners.MeanCombineFn()))
+      assert_that(res, equal_to([('a', 1.5), ('b', 3.0)]))
+
+  def test_read(self):
+    with tempfile.NamedTemporaryFile() as temp_file:
+      temp_file.write('a\nb\nc')
+      temp_file.flush()
+      with self.create_pipeline() as p:
+        assert_that(p | beam.io.ReadFromText(temp_file.name),
+                    equal_to(['a', 'b', 'c']))
+
+  def test_windowing(self):
+    with self.create_pipeline() as p:
+      res = (p | beam.Create([1, 2, 100, 101, 102])
+               | beam.Map(lambda t: TimestampedValue(('k', t), t))
+               | beam.WindowInto(beam.transforms.window.Sessions(10))
+               | beam.GroupByKey()
+               | beam.Map(lambda (k, vs): (k, sorted(vs))))
+      assert_that(res, equal_to([('k', [1, 2]), ('k', [100, 101, 102])]))
+
+if __name__ == '__main__':
+  logging.getLogger().setLevel(logging.INFO)
+  unittest.main()

From 264cba0d61f85fe5088b38281d7814ec48156bb9 Mon Sep 17 00:00:00 2001
From: Robert Bradshaw <robertwb@robertwb-u.cbf.corp.google.com>
Date: Thu, 20 Apr 2017 15:44:33 -0500
Subject: [PATCH 02/33] Add runner core files.

---
 sdks/python/apache_beam/runners/__init__.py   |  30 +
 .../runners/portability/__init__.py           |   0
 .../fn_api_runner.py}                         |   0
 .../fn_api_runner_test.py}                    |   0
 .../maptask_executor_runner.py}               |   0
 .../maptask_executor_runner_test.py}          |   0
 .../apache_beam/runners/worker/__init__.py    |   0
 .../runners/{ => worker}/data_plane.py        |   0
 .../runners/{ => worker}/data_plane_test.py   |   0
 .../runners/{ => worker}/log_handler.py       |   0
 .../runners/{ => worker}/log_handler_test.py  |   0
 .../apache_beam/runners/worker/logger.pxd     |  22 +
 .../apache_beam/runners/worker/logger.py      | 169 +++++
 .../apache_beam/runners/worker/logger_test.py | 185 +++++
 .../apache_beam/runners/worker/opcounters.pxd |  42 ++
 .../apache_beam/runners/worker/opcounters.py  | 159 +++++
 .../runners/worker/opcounters_test.py         | 148 ++++
 .../runners/worker/operation_specs.py         | 360 ++++++++++
 .../apache_beam/runners/worker/operations.pxd |  86 +++
 .../apache_beam/runners/worker/operations.py  | 644 ++++++++++++++++++
 .../runners/{ => worker}/sdk_harness.py       |   0
 .../runners/{ => worker}/sdk_harness_test.py  |   0
 .../sdk_worker_main.py}                       |   0
 .../apache_beam/runners/worker/sideinputs.py  | 162 +++++
 .../runners/worker/sideinputs_test.py         | 149 ++++
 .../runners/worker/statesampler.pyx           | 235 +++++++
 .../runners/worker/statesampler_test.py       |  92 +++
 27 files changed, 2483 insertions(+)
 create mode 100644 sdks/python/apache_beam/runners/portability/__init__.py
 rename sdks/python/apache_beam/runners/{sdk_harness_runner.py => portability/fn_api_runner.py} (100%)
 rename sdks/python/apache_beam/runners/{sdk_harness_runner_test.py => portability/fn_api_runner_test.py} (100%)
 rename sdks/python/apache_beam/runners/{worker_runner_base.py => portability/maptask_executor_runner.py} (100%)
 rename sdks/python/apache_beam/runners/{worker_runner_base_test.py => portability/maptask_executor_runner_test.py} (100%)
 create mode 100644 sdks/python/apache_beam/runners/worker/__init__.py
 rename sdks/python/apache_beam/runners/{ => worker}/data_plane.py (100%)
 rename sdks/python/apache_beam/runners/{ => worker}/data_plane_test.py (100%)
 rename sdks/python/apache_beam/runners/{ => worker}/log_handler.py (100%)
 rename sdks/python/apache_beam/runners/{ => worker}/log_handler_test.py (100%)
 create mode 100644 sdks/python/apache_beam/runners/worker/logger.pxd
 create mode 100644 sdks/python/apache_beam/runners/worker/logger.py
 create mode 100644 sdks/python/apache_beam/runners/worker/logger_test.py
 create mode 100644 sdks/python/apache_beam/runners/worker/opcounters.pxd
 create mode 100644 sdks/python/apache_beam/runners/worker/opcounters.py
 create mode 100644 sdks/python/apache_beam/runners/worker/opcounters_test.py
 create mode 100644 sdks/python/apache_beam/runners/worker/operation_specs.py
 create mode 100644 sdks/python/apache_beam/runners/worker/operations.pxd
 create mode 100644 sdks/python/apache_beam/runners/worker/operations.py
 rename sdks/python/apache_beam/runners/{ => worker}/sdk_harness.py (100%)
 rename sdks/python/apache_beam/runners/{ => worker}/sdk_harness_test.py (100%)
 rename sdks/python/apache_beam/runners/{fn_harness.py => worker/sdk_worker_main.py} (100%)
 create mode 100644 sdks/python/apache_beam/runners/worker/sideinputs.py
 create mode 100644 sdks/python/apache_beam/runners/worker/sideinputs_test.py
 create mode 100644 sdks/python/apache_beam/runners/worker/statesampler.pyx
 create mode 100644 sdks/python/apache_beam/runners/worker/statesampler_test.py

diff --git a/sdks/python/apache_beam/runners/__init__.py b/sdks/python/apache_beam/runners/__init__.py
index e69de29bb2d1..2b93c3085948 100644
--- a/sdks/python/apache_beam/runners/__init__.py
+++ b/sdks/python/apache_beam/runners/__init__.py
@@ -0,0 +1,30 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+"""Runner objects execute a Pipeline.
+
+This package defines runners, which are used to execute a pipeline.
+"""
+
+from apache_beam.runners.direct.direct_runner import DirectRunner
+from apache_beam.runners.direct.direct_runner import EagerRunner
+from apache_beam.runners.runner import PipelineRunner
+from apache_beam.runners.runner import PipelineState
+from apache_beam.runners.runner import create_runner
+
+from apache_beam.runners.dataflow.dataflow_runner import DataflowRunner
+from apache_beam.runners.dataflow.test_dataflow_runner import TestDataflowRunner
diff --git a/sdks/python/apache_beam/runners/portability/__init__.py b/sdks/python/apache_beam/runners/portability/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/sdks/python/apache_beam/runners/sdk_harness_runner.py b/sdks/python/apache_beam/runners/portability/fn_api_runner.py
similarity index 100%
rename from sdks/python/apache_beam/runners/sdk_harness_runner.py
rename to sdks/python/apache_beam/runners/portability/fn_api_runner.py
diff --git a/sdks/python/apache_beam/runners/sdk_harness_runner_test.py b/sdks/python/apache_beam/runners/portability/fn_api_runner_test.py
similarity index 100%
rename from sdks/python/apache_beam/runners/sdk_harness_runner_test.py
rename to sdks/python/apache_beam/runners/portability/fn_api_runner_test.py
diff --git a/sdks/python/apache_beam/runners/worker_runner_base.py b/sdks/python/apache_beam/runners/portability/maptask_executor_runner.py
similarity index 100%
rename from sdks/python/apache_beam/runners/worker_runner_base.py
rename to sdks/python/apache_beam/runners/portability/maptask_executor_runner.py
diff --git a/sdks/python/apache_beam/runners/worker_runner_base_test.py b/sdks/python/apache_beam/runners/portability/maptask_executor_runner_test.py
similarity index 100%
rename from sdks/python/apache_beam/runners/worker_runner_base_test.py
rename to sdks/python/apache_beam/runners/portability/maptask_executor_runner_test.py
diff --git a/sdks/python/apache_beam/runners/worker/__init__.py b/sdks/python/apache_beam/runners/worker/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
diff --git a/sdks/python/apache_beam/runners/data_plane.py b/sdks/python/apache_beam/runners/worker/data_plane.py
similarity index 100%
rename from sdks/python/apache_beam/runners/data_plane.py
rename to sdks/python/apache_beam/runners/worker/data_plane.py
diff --git a/sdks/python/apache_beam/runners/data_plane_test.py b/sdks/python/apache_beam/runners/worker/data_plane_test.py
similarity index 100%
rename from sdks/python/apache_beam/runners/data_plane_test.py
rename to sdks/python/apache_beam/runners/worker/data_plane_test.py
diff --git a/sdks/python/apache_beam/runners/log_handler.py b/sdks/python/apache_beam/runners/worker/log_handler.py
similarity index 100%
rename from sdks/python/apache_beam/runners/log_handler.py
rename to sdks/python/apache_beam/runners/worker/log_handler.py
diff --git a/sdks/python/apache_beam/runners/log_handler_test.py b/sdks/python/apache_beam/runners/worker/log_handler_test.py
similarity index 100%
rename from sdks/python/apache_beam/runners/log_handler_test.py
rename to sdks/python/apache_beam/runners/worker/log_handler_test.py
diff --git a/sdks/python/apache_beam/runners/worker/logger.pxd b/sdks/python/apache_beam/runners/worker/logger.pxd
new file mode 100644
index 000000000000..7667338cf9aa
--- /dev/null
+++ b/sdks/python/apache_beam/runners/worker/logger.pxd
@@ -0,0 +1,22 @@
+# Copyright 2016 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+cimport cython
+
+from apache_beam.runners.common cimport LoggingContext
+
+
+cdef class PerThreadLoggingContext(LoggingContext):
+  cdef kwargs
+  cdef list stack
diff --git a/sdks/python/apache_beam/runners/worker/logger.py b/sdks/python/apache_beam/runners/worker/logger.py
new file mode 100644
index 000000000000..0a18078a2c99
--- /dev/null
+++ b/sdks/python/apache_beam/runners/worker/logger.py
@@ -0,0 +1,169 @@
+# Copyright 2016 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Python Dataflow worker logging."""
+
+import json
+import logging
+import threading
+import traceback
+
+from apache_beam.runners.common import LoggingContext
+
+
+# Per-thread worker information. This is used only for logging to set
+# context information that changes while work items get executed:
+# work_item_id, step_name, stage_name.
+class _PerThreadWorkerData(threading.local):
+
+  def __init__(self):
+    super(_PerThreadWorkerData, self).__init__()
+    # TODO(robertwb): Consider starting with an initial (ignored) ~20 elements
+    # in the list, as going up and down all the way to zero incurs several
+    # reallocations.
+    self.stack = []
+
+  def get_data(self):
+    all_data = {}
+    for datum in self.stack:
+      all_data.update(datum)
+    return all_data
+
+per_thread_worker_data = _PerThreadWorkerData()
+
+
+class PerThreadLoggingContext(LoggingContext):
+  """A context manager to add per thread attributes."""
+
+  def __init__(self, **kwargs):
+    self.kwargs = kwargs
+    self.stack = per_thread_worker_data.stack
+
+  def __enter__(self):
+    self.enter()
+
+  def enter(self):
+    self.stack.append(self.kwargs)
+
+  def __exit__(self, exn_type, exn_value, exn_traceback):
+    self.exit()
+
+  def exit(self):
+    self.stack.pop()
+
+
+class JsonLogFormatter(logging.Formatter):
+  """A JSON formatter class as expected by the logging standard module."""
+
+  def __init__(self, job_id, worker_id):
+    super(JsonLogFormatter, self).__init__()
+    self.job_id = job_id
+    self.worker_id = worker_id
+
+  def format(self, record):
+    """Returns a JSON string based on a LogRecord instance.
+
+    Args:
+      record: A LogRecord instance. See below for details.
+
+    Returns:
+      A JSON string representing the record.
+
+    A LogRecord instance has the following attributes and is used for
+    formatting the final message.
+
+    Attributes:
+      created: A double representing the timestamp for record creation
+        (e.g., 1438365207.624597). Note that the number contains also msecs and
+        microsecs information. Part of this is also available in the 'msecs'
+        attribute.
+      msecs: A double representing the msecs part of the record creation
+        (e.g., 624.5970726013184).
+      msg: Logging message containing formatting instructions or an arbitrary
+        object. This is the first argument of a log call.
+      args: A tuple containing the positional arguments for the logging call.
+      levelname: A string. Possible values are: INFO, WARNING, ERROR, etc.
+      exc_info: None or a 3-tuple with exception information as it is
+        returned by a call to sys.exc_info().
+      name: Logger's name. Most logging is done using the default root logger
+        and therefore the name will be 'root'.
+      filename: Basename of the file where logging occurred.
+      funcName: Name of the function where logging occurred.
+      process: The PID of the process running the worker.
+      thread:  An id for the thread where the record was logged. This is not a
+        real TID (the one provided by OS) but rather the id (address) of a
+        Python thread object. Nevertheless having this value can allow to
+        filter log statement from only one specific thread.
+    """
+    output = {}
+    output['timestamp'] = {
+        'seconds': int(record.created),
+        'nanos': int(record.msecs * 1000000)}
+    # ERROR. INFO, DEBUG log levels translate into the same for severity
+    # property. WARNING becomes WARN.
+    output['severity'] = (
+        record.levelname if record.levelname != 'WARNING' else 'WARN')
+
+    # msg could be an arbitrary object, convert it to a string first.
+    record_msg = str(record.msg)
+
+    # Prepare the actual message using the message formatting string and the
+    # positional arguments as they have been used in the log call.
+    if record.args:
+      try:
+        output['message'] = record_msg % record.args
+      except (TypeError, ValueError):
+        output['message'] = '%s with args (%s)' % (record_msg, record.args)
+    else:
+      output['message'] = record_msg
+
+    # The thread ID is logged as a combination of the process ID and thread ID
+    # since workers can run in multiple processes.
+    output['thread'] = '%s:%s' % (record.process, record.thread)
+    # job ID and worker ID. These do not change during the lifetime of a worker.
+    output['job'] = self.job_id
+    output['worker'] = self.worker_id
+    # Stage, step and work item ID come from thread local storage since they
+    # change with every new work item leased for execution. If there is no
+    # work item ID then we make sure the step is undefined too.
+    data = per_thread_worker_data.get_data()
+    if 'work_item_id' in data:
+      output['work'] = data['work_item_id']
+    if 'stage_name' in data:
+      output['stage'] = data['stage_name']
+    if 'step_name' in data:
+      output['step'] = data['step_name']
+    # All logging happens using the root logger. We will add the basename of the
+    # file and the function name where the logging happened to make it easier
+    # to identify who generated the record.
+    output['logger'] = '%s:%s:%s' % (
+        record.name, record.filename, record.funcName)
+    # Add exception information if any is available.
+    if record.exc_info:
+      output['exception'] = ''.join(
+          traceback.format_exception(*record.exc_info))
+
+    return json.dumps(output)
+
+
+def initialize(job_id, worker_id, log_path):
+  """Initialize root logger so that we log JSON to a file and text to stdout."""
+
+  file_handler = logging.FileHandler(log_path)
+  file_handler.setFormatter(JsonLogFormatter(job_id, worker_id))
+  logging.getLogger().addHandler(file_handler)
+
+  # Set default level to INFO to avoid logging various DEBUG level log calls
+  # sprinkled throughout the code.
+  logging.getLogger().setLevel(logging.INFO)
diff --git a/sdks/python/apache_beam/runners/worker/logger_test.py b/sdks/python/apache_beam/runners/worker/logger_test.py
new file mode 100644
index 000000000000..657a1de35679
--- /dev/null
+++ b/sdks/python/apache_beam/runners/worker/logger_test.py
@@ -0,0 +1,185 @@
+# Copyright 2016 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Tests for worker logging utilities."""
+
+import json
+import logging
+import sys
+import threading
+import unittest
+
+import google3
+from dataflow_worker import logger
+
+
+class PerThreadLoggingContextTest(unittest.TestCase):
+
+  def thread_check_attribute(self, name):
+    self.assertFalse(name in logger.per_thread_worker_data.get_data())
+    with logger.PerThreadLoggingContext(**{name: 'thread-value'}):
+      self.assertEqual(
+          logger.per_thread_worker_data.get_data()[name], 'thread-value')
+    self.assertFalse(name in logger.per_thread_worker_data.get_data())
+
+  def test_no_positional_args(self):
+    with self.assertRaises(TypeError):
+      with logger.PerThreadLoggingContext('something'):
+        pass
+
+  def test_per_thread_attribute(self):
+    self.assertFalse('xyz' in logger.per_thread_worker_data.get_data())
+    with logger.PerThreadLoggingContext(xyz='value'):
+      self.assertEqual(logger.per_thread_worker_data.get_data()['xyz'], 'value')
+      thread = threading.Thread(
+          target=self.thread_check_attribute, args=('xyz',))
+      thread.start()
+      thread.join()
+      self.assertEqual(logger.per_thread_worker_data.get_data()['xyz'], 'value')
+    self.assertFalse('xyz' in logger.per_thread_worker_data.get_data())
+
+  def test_set_when_undefined(self):
+    self.assertFalse('xyz' in logger.per_thread_worker_data.get_data())
+    with logger.PerThreadLoggingContext(xyz='value'):
+      self.assertEqual(logger.per_thread_worker_data.get_data()['xyz'], 'value')
+    self.assertFalse('xyz' in logger.per_thread_worker_data.get_data())
+
+  def test_set_when_already_defined(self):
+    self.assertFalse('xyz' in logger.per_thread_worker_data.get_data())
+    with logger.PerThreadLoggingContext(xyz='value'):
+      self.assertEqual(logger.per_thread_worker_data.get_data()['xyz'], 'value')
+      with logger.PerThreadLoggingContext(xyz='value2'):
+        self.assertEqual(
+            logger.per_thread_worker_data.get_data()['xyz'], 'value2')
+      self.assertEqual(logger.per_thread_worker_data.get_data()['xyz'], 'value')
+    self.assertFalse('xyz' in logger.per_thread_worker_data.get_data())
+
+
+class JsonLogFormatterTest(unittest.TestCase):
+
+  SAMPLE_RECORD = {
+      'created': 123456.789, 'msecs': 789.654321,
+      'msg': '%s:%d:%.2f', 'args': ('xyz', 4, 3.14),
+      'levelname': 'WARNING',
+      'process': 'pid', 'thread': 'tid',
+      'name': 'name', 'filename': 'file', 'funcName': 'func',
+      'exc_info': None}
+
+  SAMPLE_OUTPUT = {
+      'timestamp': {'seconds': 123456, 'nanos': 789654321},
+      'severity': 'WARN', 'message': 'xyz:4:3.14', 'thread': 'pid:tid',
+      'job': 'jobid', 'worker': 'workerid', 'logger': 'name:file:func'}
+
+  def create_log_record(self, **kwargs):
+
+    class Record(object):
+
+      def __init__(self, **kwargs):
+        for k, v in kwargs.iteritems():
+          setattr(self, k, v)
+
+    return Record(**kwargs)
+
+  def test_basic_record(self):
+    formatter = logger.JsonLogFormatter(job_id='jobid', worker_id='workerid')
+    record = self.create_log_record(**self.SAMPLE_RECORD)
+    self.assertEqual(json.loads(formatter.format(record)), self.SAMPLE_OUTPUT)
+
+  def execute_multiple_cases(self, test_cases):
+    record = self.SAMPLE_RECORD
+    output = self.SAMPLE_OUTPUT
+    formatter = logger.JsonLogFormatter(job_id='jobid', worker_id='workerid')
+
+    for case in test_cases:
+      record['msg'] = case['msg']
+      record['args'] = case['args']
+      output['message'] = case['expected']
+
+      self.assertEqual(
+          json.loads(formatter.format(self.create_log_record(**record))),
+          output)
+
+  def test_record_with_format_character(self):
+    test_cases = [
+        {'msg': '%A', 'args': (), 'expected': '%A'},
+        {'msg': '%s', 'args': (), 'expected': '%s'},
+        {'msg': '%A%s', 'args': ('xy'), 'expected': '%A%s with args (xy)'},
+        {'msg': '%s%s', 'args': (1), 'expected': '%s%s with args (1)'},
+    ]
+
+    self.execute_multiple_cases(test_cases)
+
+  def test_record_with_arbitrary_messages(self):
+    test_cases = [
+        {'msg': ImportError('abc'), 'args': (), 'expected': 'abc'},
+        {'msg': TypeError('abc %s'), 'args': ('def'), 'expected': 'abc def'},
+    ]
+
+    self.execute_multiple_cases(test_cases)
+
+  def test_record_with_per_thread_info(self):
+    with logger.PerThreadLoggingContext(
+        work_item_id='workitem', stage_name='stage', step_name='step'):
+      formatter = logger.JsonLogFormatter(job_id='jobid', worker_id='workerid')
+      record = self.create_log_record(**self.SAMPLE_RECORD)
+      log_output = json.loads(formatter.format(record))
+    expected_output = dict(self.SAMPLE_OUTPUT)
+    expected_output.update(
+        {'work': 'workitem', 'stage': 'stage', 'step': 'step'})
+    self.assertEqual(log_output, expected_output)
+
+  def test_nested_with_per_thread_info(self):
+    formatter = logger.JsonLogFormatter(job_id='jobid', worker_id='workerid')
+    with logger.PerThreadLoggingContext(
+        work_item_id='workitem', stage_name='stage', step_name='step1'):
+      record = self.create_log_record(**self.SAMPLE_RECORD)
+      log_output1 = json.loads(formatter.format(record))
+
+      with logger.PerThreadLoggingContext(step_name='step2'):
+        record = self.create_log_record(**self.SAMPLE_RECORD)
+        log_output2 = json.loads(formatter.format(record))
+
+      record = self.create_log_record(**self.SAMPLE_RECORD)
+      log_output3 = json.loads(formatter.format(record))
+
+    record = self.create_log_record(**self.SAMPLE_RECORD)
+    log_output4 = json.loads(formatter.format(record))
+
+    self.assertEqual(log_output1, dict(
+        self.SAMPLE_OUTPUT, work='workitem', stage='stage', step='step1'))
+    self.assertEqual(log_output2, dict(
+        self.SAMPLE_OUTPUT, work='workitem', stage='stage', step='step2'))
+    self.assertEqual(log_output3, dict(
+        self.SAMPLE_OUTPUT, work='workitem', stage='stage', step='step1'))
+    self.assertEqual(log_output4, self.SAMPLE_OUTPUT)
+
+  def test_exception_record(self):
+    formatter = logger.JsonLogFormatter(job_id='jobid', worker_id='workerid')
+    try:
+      raise ValueError('Something')
+    except ValueError:
+      attribs = dict(self.SAMPLE_RECORD)
+      attribs.update({'exc_info': sys.exc_info()})
+      record = self.create_log_record(**attribs)
+    log_output = json.loads(formatter.format(record))
+    # Check if exception type, its message, and stack trace information are in.
+    exn_output = log_output.pop('exception')
+    self.assertNotEqual(exn_output.find('ValueError: Something'), -1)
+    self.assertNotEqual(exn_output.find('logger_test.py'), -1)
+    self.assertEqual(log_output, self.SAMPLE_OUTPUT)
+
+if __name__ == '__main__':
+  logging.getLogger().setLevel(logging.INFO)
+  unittest.main()
+
diff --git a/sdks/python/apache_beam/runners/worker/opcounters.pxd b/sdks/python/apache_beam/runners/worker/opcounters.pxd
new file mode 100644
index 000000000000..57382054d87b
--- /dev/null
+++ b/sdks/python/apache_beam/runners/worker/opcounters.pxd
@@ -0,0 +1,42 @@
+# Copyright 2016 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the 'License');
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an 'AS IS' BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+cimport cython
+cimport libc.stdint
+
+from apache_beam.utils.counters cimport Counter
+
+
+cdef class SumAccumulator(object):
+  cdef libc.stdint.int64_t _value
+  cpdef update(self, libc.stdint.int64_t value)
+  cpdef libc.stdint.int64_t value(self)
+
+
+cdef class OperationCounters(object):
+  cdef public _counter_factory
+  cdef public Counter element_counter
+  cdef public Counter mean_byte_counter
+  cdef public coder_impl
+  cdef public SumAccumulator active_accumulator
+  cdef public libc.stdint.int64_t _sample_counter
+  cdef public libc.stdint.int64_t _next_sample
+
+  cpdef update_from(self, windowed_value)
+  cdef inline do_sample(self, windowed_value)
+  cpdef update_collect(self)
+
+  cdef libc.stdint.int64_t _compute_next_sample(self, libc.stdint.int64_t i)
+  cdef inline bint _should_sample(self)
+  cpdef bint should_sample(self)
diff --git a/sdks/python/apache_beam/runners/worker/opcounters.py b/sdks/python/apache_beam/runners/worker/opcounters.py
new file mode 100644
index 000000000000..1b48b1306308
--- /dev/null
+++ b/sdks/python/apache_beam/runners/worker/opcounters.py
@@ -0,0 +1,159 @@
+# Copyright 2016 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the 'License');
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an 'AS IS' BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# cython: profile=True
+
+"""Counters collect the progress of the Worker for reporting to the service."""
+
+from __future__ import absolute_import
+import math
+import random
+
+from apache_beam.utils.counters import Counter
+
+
+class SumAccumulator(object):
+  """Accumulator for collecting byte counts."""
+
+  def __init__(self):
+    self._value = 0
+
+  def update(self, value):
+    self._value += value
+
+  def value(self):
+    return self._value
+
+
+class OperationCounters(object):
+  """The set of basic counters to attach to an Operation."""
+
+  def __init__(self, counter_factory, step_name, coder, output_index):
+    self._counter_factory = counter_factory
+    self.element_counter = counter_factory.get_counter(
+        '%s-out%d-ElementCount' % (step_name, output_index), Counter.SUM)
+    self.mean_byte_counter = counter_factory.get_counter(
+        '%s-out%d-MeanByteCount' % (step_name, output_index), Counter.MEAN)
+    self.coder_impl = coder.get_impl()
+    self.active_accumulator = None
+    self._sample_counter = 0
+    self._next_sample = 0
+
+  def update_from(self, windowed_value):
+    """Add one value to this counter."""
+    self.element_counter.update(1)
+    if self._should_sample():
+      self.do_sample(windowed_value)
+
+  def _observable_callback(self, inner_coder_impl, accumulator):
+    def _observable_callback_inner(value, is_encoded=False):
+      # TODO(ccy): If this stream is large, sample it as well.
+      # To do this, we'll need to compute the average size of elements
+      # in this stream to add the *total* size of this stream to accumulator.
+      # We'll also want make sure we sample at least some of this stream
+      # (as self.should_sample() may be sampling very sparsely by now).
+      if is_encoded:
+        size = len(value)
+        accumulator.update(size)
+      else:
+        accumulator.update(inner_coder_impl.estimate_size(value))
+    return _observable_callback_inner
+
+  def do_sample(self, windowed_value):
+    size, observables = (
+        self.coder_impl.get_estimated_size_and_observables(windowed_value))
+    if not observables:
+      self.mean_byte_counter.update(size)
+    else:
+      self.active_accumulator = SumAccumulator()
+      self.active_accumulator.update(size)
+      for observable, inner_coder_impl in observables:
+        observable.register_observer(
+            self._observable_callback(
+                inner_coder_impl, self.active_accumulator))
+
+  def update_collect(self):
+    """Collects the accumulated size estimates.
+
+    Now that the element has been processed, we ask our accumulator
+    for the total and store the result in a counter.
+    """
+    if self.active_accumulator is not None:
+      self.mean_byte_counter.update(self.active_accumulator.value())
+      self.active_accumulator = None
+
+  def _compute_next_sample(self, i):
+    # https://en.wikipedia.org/wiki/Reservoir_sampling#Fast_Approximation
+    gap = math.log(1.0 - random.random()) / math.log(1.0 - 10.0/i)
+    return i + math.floor(gap)
+
+  def _should_sample(self):
+    """Determines whether to sample the next element.
+
+    Size calculation can be expensive, so we don't do it for each element.
+    Because we need only an estimate of average size, we sample.
+
+    We always sample the first 10 elements, then the sampling rate
+    is approximately 10/N.  After reading N elements, of the next N,
+    we will sample approximately 10*ln(2) (about 7) elements.
+
+    This algorithm samples at the same rate as Reservoir Sampling, but
+    it never throws away early results.  (Because we keep only a
+    running accumulation, storage is not a problem, so there is no
+    need to discard earlier calculations.)
+
+    Because we accumulate and do not replace, our statistics are
+    biased toward early data.  If the data are distributed uniformly,
+    this is not a problem.  If the data change over time (i.e., the
+    element size tends to grow or shrink over time), our estimate will
+    show the bias.  We could correct this by giving weight N to each
+    sample, since each sample is a stand-in for the N/(10*ln(2))
+    samples around it, which is proportional to N.  Since we do not
+    expect biased data, for efficiency we omit the extra multiplication.
+    We could reduce the early-data bias by putting a lower bound on
+    the sampling rate.
+
+    Computing random.randint(1, self._sample_counter) for each element
+    is too slow, so when the sample size is big enough (we estimate 30
+    is big enough), we estimate the size of the gap after each sample.
+    This estimation allows us to call random much less often.
+
+    Returns:
+      True if it is time to compute another element's size.
+    """
+
+    self._sample_counter += 1
+    if self._next_sample == 0:
+      if random.randint(1, self._sample_counter) <= 10:
+        if self._sample_counter > 30:
+          self._next_sample = self._compute_next_sample(self._sample_counter)
+        return True
+      return False
+    elif self._sample_counter >= self._next_sample:
+      self._next_sample = self._compute_next_sample(self._sample_counter)
+      return True
+    return False
+
+  def should_sample(self):
+    # We create this separate method because the above "_should_sample()" method
+    # is marked as inline in Cython and thus can't be exposed to Python code.
+    return self._should_sample()
+
+  def __str__(self):
+    return '<%s [%s]>' % (self.__class__.__name__,
+                          ', '.join([str(x) for x in self.__iter__()]))
+
+  def __repr__(self):
+    return '<%s %s at %s>' % (self.__class__.__name__,
+                              [x for x in self.__iter__()], hex(id(self)))
diff --git a/sdks/python/apache_beam/runners/worker/opcounters_test.py b/sdks/python/apache_beam/runners/worker/opcounters_test.py
new file mode 100644
index 000000000000..8f6aa7e36c4e
--- /dev/null
+++ b/sdks/python/apache_beam/runners/worker/opcounters_test.py
@@ -0,0 +1,148 @@
+# Copyright 2016 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Tests for worker counters."""
+
+import logging
+import math
+import random
+import unittest
+
+import google3
+from apache_beam import coders
+from apache_beam.transforms.window import GlobalWindows
+from apache_beam.utils.counters import CounterFactory
+from dataflow_worker.opcounters import OperationCounters
+
+
+# Classes to test that we can handle a variety of objects.
+# These have to be at top level so the pickler can find them.
+
+
+class OldClassThatDoesNotImplementLen:  # pylint: disable=old-style-class
+
+  def __init__(self):
+    pass
+
+
+class ObjectThatDoesNotImplementLen(object):
+
+  def __init__(self):
+    pass
+
+
+class OperationCountersTest(unittest.TestCase):
+
+  def verify_counters(self, opcounts, expected_elements, expected_size=None):
+    self.assertEqual(expected_elements, opcounts.element_counter.value())
+    if expected_size is not None:
+      if math.isnan(expected_size):
+        self.assertTrue(math.isnan(opcounts.mean_byte_counter.value()))
+      else:
+        self.assertEqual(expected_size, opcounts.mean_byte_counter.value())
+
+  def test_update_int(self):
+    opcounts = OperationCounters(CounterFactory(), 'some-name',
+                                 coders.PickleCoder(), 0)
+    self.verify_counters(opcounts, 0)
+    opcounts.update_from(GlobalWindows.windowed_value(1))
+    self.verify_counters(opcounts, 1)
+
+  def test_update_str(self):
+    coder = coders.PickleCoder()
+    opcounts = OperationCounters(CounterFactory(), 'some-name',
+                                 coder, 0)
+    self.verify_counters(opcounts, 0, float('nan'))
+    value = GlobalWindows.windowed_value('abcde')
+    opcounts.update_from(value)
+    estimated_size = coder.estimate_size(value)
+    self.verify_counters(opcounts, 1, estimated_size)
+
+  def test_update_old_object(self):
+    coder = coders.PickleCoder()
+    opcounts = OperationCounters(CounterFactory(), 'some-name',
+                                 coder, 0)
+    self.verify_counters(opcounts, 0, float('nan'))
+    obj = OldClassThatDoesNotImplementLen()
+    value = GlobalWindows.windowed_value(obj)
+    opcounts.update_from(value)
+    estimated_size = coder.estimate_size(value)
+    self.verify_counters(opcounts, 1, estimated_size)
+
+  def test_update_new_object(self):
+    coder = coders.PickleCoder()
+    opcounts = OperationCounters(CounterFactory(), 'some-name',
+                                 coder, 0)
+    self.verify_counters(opcounts, 0, float('nan'))
+
+    obj = ObjectThatDoesNotImplementLen()
+    value = GlobalWindows.windowed_value(obj)
+    opcounts.update_from(value)
+    estimated_size = coder.estimate_size(value)
+    self.verify_counters(opcounts, 1, estimated_size)
+
+  def test_update_multiple(self):
+    coder = coders.PickleCoder()
+    total_size = 0
+    opcounts = OperationCounters(CounterFactory(), 'some-name',
+                                 coder, 0)
+    self.verify_counters(opcounts, 0, float('nan'))
+    value = GlobalWindows.windowed_value('abcde')
+    opcounts.update_from(value)
+    total_size += coder.estimate_size(value)
+    value = GlobalWindows.windowed_value('defghij')
+    opcounts.update_from(value)
+    total_size += coder.estimate_size(value)
+    self.verify_counters(opcounts, 2, float(total_size) / 2)
+    value = GlobalWindows.windowed_value('klmnop')
+    opcounts.update_from(value)
+    total_size += coder.estimate_size(value)
+    self.verify_counters(opcounts, 3, float(total_size) / 3)
+
+  def test_should_sample(self):
+    # Order of magnitude more buckets than highest constant in code under test.
+    buckets = [0] * 300
+    # The seed is arbitrary and exists just to ensure this test is robust.
+    # If you don't like this seed, try your own; the test should still pass.
+    random.seed(1717)
+    # Do enough runs that the expected hits even in the last buckets
+    # is big enough to expect some statistical smoothing.
+    total_runs = 10 * len(buckets)
+
+    # Fill the buckets.
+    for _ in xrange(total_runs):
+      opcounts = OperationCounters(CounterFactory(), 'some-name',
+                                   coders.PickleCoder(), 0)
+      for i in xrange(len(buckets)):
+        if opcounts.should_sample():
+          buckets[i] += 1
+
+    # Look at the buckets to see if they are likely.
+    for i in xrange(10):
+      self.assertEqual(total_runs, buckets[i])
+    for i in xrange(10, len(buckets)):
+      self.assertTrue(buckets[i] > 7 * total_runs / i,
+                      'i=%d, buckets[i]=%d, expected=%d, ratio=%f' % (
+                          i, buckets[i],
+                          10 * total_runs / i,
+                          buckets[i] / (10.0 * total_runs / i)))
+      self.assertTrue(buckets[i] < 14 * total_runs / i,
+                      'i=%d, buckets[i]=%d, expected=%d, ratio=%f' % (
+                          i, buckets[i],
+                          10 * total_runs / i,
+                          buckets[i] / (10.0 * total_runs / i)))
+
+if __name__ == '__main__':
+  logging.getLogger().setLevel(logging.INFO)
+  unittest.main()
diff --git a/sdks/python/apache_beam/runners/worker/operation_specs.py b/sdks/python/apache_beam/runners/worker/operation_specs.py
new file mode 100644
index 000000000000..5b2908f964b2
--- /dev/null
+++ b/sdks/python/apache_beam/runners/worker/operation_specs.py
@@ -0,0 +1,360 @@
+# Copyright 2016 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Worker utilities for representing MapTasks.
+
+Each MapTask represents a sequence of ParallelInstruction(s): read from a
+source, write to a sink, parallel do, etc.
+"""
+
+import collections
+
+from apache_beam import coders
+
+
+def build_worker_instruction(*args):
+  """Create an object representing a ParallelInstruction protobuf.
+
+  This will be a collections.namedtuple with a custom __str__ method.
+
+  Alas, this wrapper is not known to pylint, which thinks it creates
+  constants.  You may have to put a disable=invalid-name pylint
+  annotation on any use of this, depending on your names.
+
+  Args:
+    *args: first argument is the name of the type to create.  Should
+      start with "Worker".  Second arguments is alist of the
+      attributes of this object.
+  Returns:
+    A new class, a subclass of tuple, that represents the protobuf.
+  """
+  tuple_class = collections.namedtuple(*args)
+  tuple_class.__str__ = worker_object_to_string
+  tuple_class.__repr__ = worker_object_to_string
+  return tuple_class
+
+
+def worker_printable_fields(workerproto):
+  """Returns the interesting fields of a Worker* object."""
+  return ['%s=%s' % (name, value)
+          # _asdict is the only way and cannot subclass this generated class
+          # pylint: disable=protected-access
+          for name, value in workerproto._asdict().iteritems()
+          # want to output value 0 but not None nor []
+          if (value or value == 0)
+          and name not in
+          ('coder', 'coders', 'output_coders',
+           'elements',
+           'combine_fn', 'serialized_fn', 'window_fn',
+           'append_trailing_newlines', 'strip_trailing_newlines',
+           'compression_type', 'context',
+           'start_shuffle_position', 'end_shuffle_position',
+           'shuffle_reader_config', 'shuffle_writer_config')]
+
+
+def worker_object_to_string(worker_object):
+  """Returns a string compactly representing a Worker* object."""
+  return '%s(%s)' % (worker_object.__class__.__name__,
+                     ', '.join(worker_printable_fields(worker_object)))
+
+
+# All the following Worker* definitions will have these lint problems:
+# pylint: disable=invalid-name
+# pylint: disable=pointless-string-statement
+
+
+WorkerRead = build_worker_instruction(
+    'WorkerRead', ['source', 'output_coders'])
+"""Worker details needed to read from a source.
+
+Attributes:
+  source: a source object.
+  output_coders: 1-tuple of the coder for the output.
+"""
+
+
+WorkerSideInputSource = build_worker_instruction(
+    'WorkerSideInputSource', ['source', 'tag'])
+"""Worker details needed to read from a side input source.
+
+Attributes:
+  source: a source object.
+  tag: string tag for this side input.
+"""
+
+
+WorkerGroupingShuffleRead = build_worker_instruction(
+    'WorkerGroupingShuffleRead',
+    ['start_shuffle_position', 'end_shuffle_position',
+     'shuffle_reader_config', 'coder', 'output_coders'])
+"""Worker details needed to read from a grouping shuffle source.
+
+Attributes:
+  start_shuffle_position: An opaque string to be passed to the shuffle
+    source to indicate where to start reading.
+  end_shuffle_position: An opaque string to be passed to the shuffle
+    source to indicate where to stop reading.
+  shuffle_reader_config: An opaque string used to initialize the shuffle
+    reader. Contains things like connection endpoints for the shuffle
+    server appliance and various options.
+  coder: The KV coder used to decode shuffle entries.
+  output_coders: 1-tuple of the coder for the output.
+"""
+
+
+WorkerUngroupedShuffleRead = build_worker_instruction(
+    'WorkerUngroupedShuffleRead',
+    ['start_shuffle_position', 'end_shuffle_position',
+     'shuffle_reader_config', 'coder', 'output_coders'])
+"""Worker details needed to read from an ungrouped shuffle source.
+
+Attributes:
+  start_shuffle_position: An opaque string to be passed to the shuffle
+    source to indicate where to start reading.
+  end_shuffle_position: An opaque string to be passed to the shuffle
+    source to indicate where to stop reading.
+  shuffle_reader_config: An opaque string used to initialize the shuffle
+    reader. Contains things like connection endpoints for the shuffle
+    server appliance and various options.
+  coder: The value coder used to decode shuffle entries.
+"""
+
+
+WorkerWrite = build_worker_instruction(
+    'WorkerWrite', ['sink', 'input', 'output_coders'])
+"""Worker details needed to write to a sink.
+
+Attributes:
+  sink: a sink object.
+  input: A (producer index, output index) tuple representing the
+    ParallelInstruction operation whose output feeds into this operation.
+    The output index is 0 except for multi-output operations (like ParDo).
+  output_coders: 1-tuple, coder to use to estimate bytes written.
+"""
+
+
+WorkerInMemoryWrite = build_worker_instruction(
+    'WorkerInMemoryWrite',
+    ['output_buffer', 'write_windowed_values', 'input', 'output_coders'])
+"""Worker details needed to write to a in-memory sink.
+
+Used only for unit testing. It makes worker tests less cluttered with code like
+"write to a file and then check file contents".
+
+Attributes:
+  output_buffer: list to which output elements will be appended
+  write_windowed_values: whether to record the entire WindowedValue outputs,
+    or just the raw (unwindowed) value
+  input: A (producer index, output index) tuple representing the
+    ParallelInstruction operation whose output feeds into this operation.
+    The output index is 0 except for multi-output operations (like ParDo).
+  output_coders: 1-tuple, coder to use to estimate bytes written.
+"""
+
+
+WorkerShuffleWrite = build_worker_instruction(
+    'WorkerShuffleWrite',
+    ['shuffle_kind', 'shuffle_writer_config', 'input', 'output_coders'])
+"""Worker details needed to write to a shuffle sink.
+
+Attributes:
+  shuffle_kind: A string describing the shuffle kind. This can control the
+    way the worker interacts with the shuffle sink. The possible values are:
+    'ungrouped', 'group_keys', and 'group_keys_and_sort_values'.
+  shuffle_writer_config: An opaque string used to initialize the shuffle
+    write. Contains things like connection endpoints for the shuffle
+    server appliance and various options.
+  input: A (producer index, output index) tuple representing the
+    ParallelInstruction operation whose output feeds into this operation.
+    The output index is 0 except for multi-output operations (like ParDo).
+  output_coders: 1-tuple of the coder for input elements. If the
+    shuffle_kind is grouping, this is expected to be a KV coder.
+"""
+
+
+WorkerDoFn = build_worker_instruction(
+    'WorkerDoFn',
+    ['serialized_fn', 'output_tags', 'input', 'side_inputs', 'output_coders'])
+"""Worker details needed to run a DoFn.
+Attributes:
+  serialized_fn: A serialized DoFn object to be run for each input element.
+  output_tags: The string tags used to identify the outputs of a ParDo
+    operation. The tag is present even if the ParDo has just one output
+    (e.g., ['out'].
+  output_coders: array of coders, one for each output.
+  input: A (producer index, output index) tuple representing the
+    ParallelInstruction operation whose output feeds into this operation.
+    The output index is 0 except for multi-output operations (like ParDo).
+  side_inputs: A list of Worker...Read instances describing sources to be
+    used for getting values. The types supported right now are
+    WorkerInMemoryRead and WorkerTextRead.
+"""
+
+
+WorkerReifyTimestampAndWindows = build_worker_instruction(
+    'WorkerReifyTimestampAndWindows',
+    ['output_tags', 'input', 'output_coders'])
+"""Worker details needed to run a WindowInto.
+Attributes:
+  output_tags: The string tags used to identify the outputs of a ParDo
+    operation. The tag is present even if the ParDo has just one output
+    (e.g., ['out'].
+  output_coders: array of coders, one for each output.
+  input: A (producer index, output index) tuple representing the
+    ParallelInstruction operation whose output feeds into this operation.
+    The output index is 0 except for multi-output operations (like ParDo).
+"""
+
+
+WorkerMergeWindows = build_worker_instruction(
+    'WorkerMergeWindows',
+    ['window_fn', 'combine_fn', 'phase', 'output_tags', 'input', 'coders',
+     'context', 'output_coders'])
+"""Worker details needed to run a MergeWindows (aka. GroupAlsoByWindows).
+Attributes:
+  window_fn: A serialized Windowing object representing the windowing strategy.
+  combine_fn: A serialized CombineFn object to be used after executing the
+    GroupAlsoByWindows operation. May be None if not a combining operation.
+  phase: Possible values are 'all', 'add', 'merge', and 'extract'.
+    The dataflow optimizer may split the user combiner in 3 separate
+    phases (ADD, MERGE, and EXTRACT), on separate VMs, as it sees
+    fit. The phase attribute dictates which DoFn is actually running in
+    the worker. May be None if not a combining operation.
+  output_tags: The string tags used to identify the outputs of a ParDo
+    operation. The tag is present even if the ParDo has just one output
+    (e.g., ['out'].
+  output_coders: array of coders, one for each output.
+  input: A (producer index, output index) tuple representing the
+    ParallelInstruction operation whose output feeds into this operation.
+    The output index is 0 except for multi-output operations (like ParDo).
+  coders: A 2-tuple of coders (key, value) to encode shuffle entries.
+  context: The ExecutionContext object for the current work item.
+"""
+
+
+WorkerCombineFn = build_worker_instruction(
+    'WorkerCombineFn',
+    ['serialized_fn', 'phase', 'input', 'output_coders'])
+"""Worker details needed to run a CombineFn.
+Attributes:
+  serialized_fn: A serialized CombineFn object to be used.
+  phase: Possible values are 'all', 'add', 'merge', and 'extract'.
+    The dataflow optimizer may split the user combiner in 3 separate
+    phases (ADD, MERGE, and EXTRACT), on separate VMs, as it sees
+    fit. The phase attribute dictates which DoFn is actually running in
+    the worker.
+  input: A (producer index, output index) tuple representing the
+    ParallelInstruction operation whose output feeds into this operation.
+    The output index is 0 except for multi-output operations (like ParDo).
+  output_coders: 1-tuple of the coder for the output.
+"""
+
+
+WorkerPartialGroupByKey = build_worker_instruction(
+    'WorkerPartialGroupByKey',
+    ['combine_fn', 'input', 'output_coders'])
+"""Worker details needed to run a partial group-by-key.
+Attributes:
+  combine_fn: A serialized CombineFn object to be used.
+  input: A (producer index, output index) tuple representing the
+    ParallelInstruction operation whose output feeds into this operation.
+    The output index is 0 except for multi-output operations (like ParDo).
+  output_coders: 1-tuple of the coder for the output.
+"""
+
+
+WorkerFlatten = build_worker_instruction(
+    'WorkerFlatten',
+    ['inputs', 'output_coders'])
+"""Worker details needed to run a Flatten.
+Attributes:
+  inputs: A list of tuples, each (producer index, output index), representing
+    the ParallelInstruction operations whose output feeds into this operation.
+    The output index is 0 unless the input is from a multi-output
+    operation (such as ParDo).
+  output_coders: 1-tuple of the coder for the output.
+"""
+
+
+def get_coder_from_spec(coder_spec):
+  """Return a coder instance from a coder spec.
+
+  Args:
+    coder_spec: A dict where the value of the '@type' key is a pickled instance
+      of a Coder instance.
+
+  Returns:
+    A coder instance (has encode/decode methods).
+  """
+  assert coder_spec is not None
+
+  # Ignore the wrappers in these encodings.
+  # TODO(silviuc): Make sure with all the renamings that names below are ok.
+  ignored_wrappers = (
+      'com.google.cloud.dataflow.sdk.util.TimerOrElement$TimerOrElementCoder')
+  if coder_spec['@type'] in ignored_wrappers:
+    assert len(coder_spec['component_encodings']) == 1
+    coder_spec = coder_spec['component_encodings'][0]
+    return get_coder_from_spec(coder_spec)
+
+  # Handle a few well known types of coders.
+  if coder_spec['@type'] == 'kind:pair':
+    assert len(coder_spec['component_encodings']) == 2
+    component_coders = [
+        get_coder_from_spec(c) for c in coder_spec['component_encodings']]
+    return coders.TupleCoder(component_coders)
+  elif coder_spec['@type'] == 'kind:stream':
+    assert len(coder_spec['component_encodings']) == 1
+    return coders.IterableCoder(
+        get_coder_from_spec(coder_spec['component_encodings'][0]))
+  elif coder_spec['@type'] == 'kind:windowed_value':
+    assert len(coder_spec['component_encodings']) == 2
+    value_coder, window_coder = [
+        get_coder_from_spec(c) for c in coder_spec['component_encodings']]
+    return coders.WindowedValueCoder(value_coder, window_coder=window_coder)
+  elif coder_spec['@type'] == 'kind:global_window':
+    assert ('component_encodings' not in coder_spec
+            or len(coder_spec['component_encodings'] == 0))
+    return coders.GlobalWindowCoder()
+  elif coder_spec['@type'] == 'kind:length_prefix':
+    assert len(coder_spec['component_encodings']) == 1
+    return coders.LengthPrefixCoder(
+        get_coder_from_spec(coder_spec['component_encodings'][0]))
+
+  # We pass coders in the form "<coder_name>$<pickled_data>" to make the job
+  # description JSON more readable.
+  return coders.deserialize_coder(coder_spec['@type'])
+
+
+class MapTask(object):
+  """A map task decoded into operations and ready to be executed.
+
+  Attributes:
+    operations: A list of Worker* object created by parsing the instructions
+      within the map task.
+    stage_name: The name of this map task execution stage.
+    system_names: The system names of the step corresponding to each map task
+      operation.
+    step_names: The names of the step corresponding to each map task operation.
+  """
+
+  def __init__(self, operations, stage_name, system_names, step_names):
+    self.operations = operations
+    self.stage_name = stage_name
+    self.system_names = system_names
+    self.step_names = step_names
+
+  def __str__(self):
+    return '<%s %s steps=%s>' % (self.__class__.__name__, self.stage_name,
+                                 '+'.join(self.step_names))
diff --git a/sdks/python/apache_beam/runners/worker/operations.pxd b/sdks/python/apache_beam/runners/worker/operations.pxd
new file mode 100644
index 000000000000..06cd062bad4d
--- /dev/null
+++ b/sdks/python/apache_beam/runners/worker/operations.pxd
@@ -0,0 +1,86 @@
+# Copyright 2016 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the 'License');
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an 'AS IS' BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+cimport cython
+
+from apache_beam.runners.common cimport Receiver
+from apache_beam.utils.windowed_value cimport WindowedValue
+from dataflow_worker cimport opcounters
+from apache_beam.metrics.execution cimport ScopedMetricsContainer
+
+
+cdef WindowedValue _globally_windowed_value
+cdef type _global_window_type
+
+cdef class ConsumerSet(Receiver):
+  cdef list consumers
+  cdef opcounters.OperationCounters opcounter
+  cdef public step_name
+  cdef public output_index
+  cdef public coder
+
+  cpdef receive(self, WindowedValue windowed_value)
+  cpdef update_counters_start(self, WindowedValue windowed_value)
+  cpdef update_counters_finish(self)
+
+
+cdef class Operation(object):
+  cdef readonly operation_name
+  cdef readonly spec
+  cdef object consumers
+  cdef readonly counter_factory
+  cdef public metrics_container
+  cdef public ScopedMetricsContainer scoped_metrics_container
+  # Public for access by Fn harness operations.
+  # TODO(robertwb): Cythonize FnHarness.
+  cdef public list receivers
+  cdef readonly bint debug_logging_enabled
+
+  cdef public step_name  # initialized lazily
+
+  cdef readonly object state_sampler
+
+  cdef readonly object scoped_start_state
+  cdef readonly object scoped_process_state
+  cdef readonly object scoped_finish_state
+
+  cpdef start(self)
+  cpdef process(self, WindowedValue windowed_value)
+  cpdef finish(self)
+
+  cpdef output(self, WindowedValue windowed_value, int output_index=*)
+
+cdef class ReadOperation(Operation):
+  @cython.locals(windowed_value=WindowedValue)
+  cpdef start(self)
+
+cdef class DoOperation(Operation):
+  cdef object dofn_runner
+  cdef Receiver dofn_receiver
+
+cdef class CombineOperation(Operation):
+  cdef object phased_combine_fn
+
+cdef class FlattenOperation(Operation):
+  pass
+
+cdef class PGBKCVOperation(Operation):
+  cdef public object combine_fn
+  cdef public object combine_fn_add_input
+  cdef dict table
+  cdef long max_keys
+  cdef long key_count
+
+  cpdef output_key(self, tuple wkey, value)
+
diff --git a/sdks/python/apache_beam/runners/worker/operations.py b/sdks/python/apache_beam/runners/worker/operations.py
new file mode 100644
index 000000000000..458b4bfafad6
--- /dev/null
+++ b/sdks/python/apache_beam/runners/worker/operations.py
@@ -0,0 +1,644 @@
+# Copyright 2016 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the 'License');
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an 'AS IS' BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# cython: profile=True
+
+"""Worker operations executor."""
+
+import collections
+import itertools
+import logging
+
+from apache_beam import pvalue
+from apache_beam.internal import pickler
+from apache_beam.io import iobase
+from apache_beam.metrics.execution import MetricsContainer
+from apache_beam.metrics.execution import ScopedMetricsContainer
+from apache_beam.runners import common
+from apache_beam.runners.common import Receiver
+from apache_beam.runners.dataflow.internal.names import PropertyNames
+from apache_beam.transforms import combiners
+from apache_beam.transforms import core
+from apache_beam.transforms import sideinputs as apache_sideinputs
+from apache_beam.transforms.combiners import curry_combine_fn
+from apache_beam.transforms.combiners import PhasedCombineFnExecutor
+from apache_beam.transforms.window import GlobalWindows
+from apache_beam.utils.windowed_value import WindowedValue
+import cython
+from dataflow_worker import logger
+from dataflow_worker import operation_specs
+from dataflow_worker import sideinputs
+
+_globally_windowed_value = GlobalWindows.windowed_value(None)
+_global_window_type = type(_globally_windowed_value.windows[0])
+
+
+class ConsumerSet(Receiver):
+  """A ConsumerSet represents a graph edge between two Operation nodes.
+
+  The ConsumerSet object collects information from the output of the
+  Operation at one end of its edge and the input of the Operation at
+  the other edge.
+  ConsumerSet are attached to the outputting Operation.
+  """
+
+  def __init__(
+      self, counter_factory, step_name, output_index, consumers, coder):
+    self.consumers = consumers
+    self.opcounter = opcounters.OperationCounters(
+        counter_factory, step_name, coder, output_index)
+    # Used in repr.
+    self.step_name = step_name
+    self.output_index = output_index
+    self.coder = coder
+
+  def output(self, windowed_value):  # For old SDKs.
+    self.receive(windowed_value)
+
+  def receive(self, windowed_value):
+    self.update_counters_start(windowed_value)
+    for consumer in self.consumers:
+      cython.cast(Operation, consumer).process(windowed_value)
+    self.update_counters_finish()
+
+  def update_counters_start(self, windowed_value):
+    self.opcounter.update_from(windowed_value)
+
+  def update_counters_finish(self):
+    self.opcounter.update_collect()
+
+  def __repr__(self):
+    return '%s[%s.out%s, coder=%s, len(consumers)=%s]' % (
+        self.__class__.__name__, self.step_name, self.output_index, self.coder,
+        len(self.consumers))
+
+
+class Operation(object):
+  """An operation representing the live version of a work item specification.
+
+  An operation can have one or more outputs and for each output it can have
+  one or more receiver operations that will take that as input.
+  """
+
+  def __init__(self, operation_name, spec, counter_factory, state_sampler):
+    """Initializes a worker operation instance.
+
+    Args:
+      operation_name: The system name assigned by the Dataflow service for this
+        operation.
+      spec: A operation_specs.Worker* instance.
+      counter_factory: The CounterFactory to use for our counters.
+      state_sampler: The StateSampler for the current operation.
+    """
+    self.operation_name = operation_name
+    self.spec = spec
+    self.counter_factory = counter_factory
+    self.consumers = collections.defaultdict(list)
+
+    self.state_sampler = state_sampler
+    self.scoped_start_state = self.state_sampler.scoped_state(
+        self.operation_name + '-start')
+    self.scoped_process_state = self.state_sampler.scoped_state(
+        self.operation_name + '-process')
+    self.scoped_finish_state = self.state_sampler.scoped_state(
+        self.operation_name + '-finish')
+    # TODO(ccy): the '-abort' state can be added when the abort is supported in
+    # Operations.
+
+  def start(self):
+    """Start operation."""
+    self.debug_logging_enabled = logging.getLogger().isEnabledFor(
+        logging.DEBUG)
+    # Everything except WorkerSideInputSource, which is not a
+    # top-level operation, should have output_coders
+    if getattr(self.spec, 'output_coders', None):
+      self.receivers = [ConsumerSet(self.counter_factory, self.step_name,
+                                    i, self.consumers[i], coder)
+                        for i, coder in enumerate(self.spec.output_coders)]
+
+  def finish(self):
+    """Finish operation."""
+    pass
+
+  def process(self, o):
+    """Process element in operation."""
+    pass
+
+  def output(self, windowed_value, output_index=0):
+    cython.cast(Receiver, self.receivers[output_index]).receive(windowed_value)
+
+  def add_receiver(self, operation, output_index=0):
+    """Adds a receiver operation for the specified output."""
+    self.consumers[output_index].append(operation)
+
+  def __str__(self):
+    """Generates a useful string for this object.
+
+    Compactly displays interesting fields.  In particular, pickled
+    fields are not displayed.  Note that we collapse the fields of the
+    contained Worker* object into this object, since there is a 1-1
+    mapping between Operation and operation_specs.Worker*.
+
+    Returns:
+      Compact string representing this object.
+    """
+    return self.str_internal()
+
+  def str_internal(self, is_recursive=False):
+    """Internal helper for __str__ that supports recursion.
+
+    When recursing on receivers, keep the output short.
+    Args:
+      is_recursive: whether to omit some details, particularly receivers.
+    Returns:
+      Compact string representing this object.
+    """
+    printable_name = self.__class__.__name__
+    if hasattr(self, 'step_name'):
+      printable_name += ' %s' % self.step_name
+      if is_recursive:
+        # If we have a step name, stop here, no more detail needed.
+        return '<%s>' % printable_name
+
+    if self.spec is None:
+      printable_fields = []
+    else:
+      printable_fields = operation_specs.worker_printable_fields(self.spec)
+
+    if not is_recursive and getattr(self, 'receivers', []):
+      printable_fields.append('receivers=[%s]' % ', '.join([
+          str(receiver) for receiver in self.receivers]))
+
+    return '<%s %s>' % (printable_name, ', '.join(printable_fields))
+
+
+class ReadOperation(Operation):
+
+  def start(self):
+    with self.scoped_start_state:
+      super(ReadOperation, self).start()
+      range_tracker = self.spec.source.source.get_range_tracker(
+          self.spec.source.start_position, self.spec.source.stop_position)
+      for value in self.spec.source.source.read(range_tracker):
+        if isinstance(value, WindowedValue):
+          windowed_value = value
+        else:
+          windowed_value = _globally_windowed_value.with_value(value)
+        self.output(windowed_value)
+
+
+class InMemoryWriteOperation(Operation):
+  """A write operation that will write to an in-memory sink."""
+
+  def process(self, o):
+    with self.scoped_process_state:
+      if self.debug_logging_enabled:
+        logging.debug('Processing [%s] in %s', o, self)
+      self.spec.output_buffer.append(
+          o if self.spec.write_windowed_values else o.value)
+
+
+class _TaggedReceivers(dict):
+
+  class NullReceiver(Receiver):
+
+    def receive(self, element):
+      pass
+
+    # For old SDKs.
+    def output(self, element):
+      pass
+
+  def __missing__(self, unused_key):
+    if not getattr(self, '_null_receiver', None):
+      self._null_receiver = _TaggedReceivers.NullReceiver()
+    return self._null_receiver
+
+
+class DoOperation(Operation):
+  """A Do operation that will execute a custom DoFn for each input element."""
+
+  def __init__(self, operation_name, spec, counter_factory, state_sampler):
+    super(DoOperation, self).__init__(
+        operation_name, spec, counter_factory, state_sampler)
+
+  def _read_side_inputs(self, tags_and_types):
+    """Generator reading side inputs in the order prescribed by tags_and_types.
+
+    Args:
+      tags_and_types: List of tuples (tag, type). Each side input has a string
+        tag that is specified in the worker instruction. The type is actually
+        a boolean which is True for singleton input (read just first value)
+        and False for collection input (read all values).
+
+    Yields:
+      With each iteration it yields the result of reading an entire side source
+      either in singleton or collection mode according to the tags_and_types
+      argument.
+    """
+    # We will read the side inputs in the order prescribed by the
+    # tags_and_types argument because this is exactly the order needed to
+    # replace the ArgumentPlaceholder objects in the args/kwargs of the DoFn
+    # getting the side inputs.
+    #
+    # Note that for each tag there could be several read operations in the
+    # specification. This can happen for instance if the source has been
+    # sharded into several files.
+    for side_tag, view_class, view_options in tags_and_types:
+      sources = []
+      # Using the side_tag in the lambda below will trigger a pylint warning.
+      # However in this case it is fine because the lambda is used right away
+      # while the variable has the value assigned by the current iteration of
+      # the for loop.
+      # pylint: disable=cell-var-from-loop
+      for si in itertools.ifilter(
+          lambda o: o.tag == side_tag, self.spec.side_inputs):
+        if not isinstance(si, operation_specs.WorkerSideInputSource):
+          raise NotImplementedError('Unknown side input type: %r' % si)
+        sources.append(si.source)
+      iterator_fn = sideinputs.get_iterator_fn_for_sources(sources)
+
+      # Backwards compatibility for pre BEAM-733 SDKs.
+      if isinstance(view_options, tuple):
+        if view_class == pvalue.SingletonPCollectionView:
+          has_default, default = view_options
+          view_options = {'default': default} if has_default else {}
+        else:
+          view_options = {}
+
+      yield apache_sideinputs.SideInputMap(
+          view_class, view_options, sideinputs.EmulatedIterable(iterator_fn))
+
+  def start(self):
+    with self.scoped_start_state:
+      super(DoOperation, self).start()
+
+      # See fn_data in dataflow_runner.py
+      fn, args, kwargs, tags_and_types, window_fn = (
+          pickler.loads(self.spec.serialized_fn))
+
+      state = common.DoFnState(self.counter_factory)
+      state.step_name = self.step_name
+
+      # TODO(silviuc): What is the proper label here? PCollection being
+      # processed?
+      context = common.DoFnContext('label', state=state)
+      # Tag to output index map used to dispatch the side output values emitted
+      # by the DoFn function to the appropriate receivers. The main output is
+      # tagged with None and is associated with its corresponding index.
+      tagged_receivers = _TaggedReceivers()
+
+      output_tag_prefix = PropertyNames.OUT + '_'
+      for index, tag in enumerate(self.spec.output_tags):
+        if tag == PropertyNames.OUT:
+          original_tag = None
+        elif tag.startswith(output_tag_prefix):
+          original_tag = tag[len(output_tag_prefix):]
+        else:
+          raise ValueError('Unexpected output name for operation: %s' % tag)
+        tagged_receivers[original_tag] = self.receivers[index]
+
+      self.dofn_runner = common.DoFnRunner(
+          fn, args, kwargs, self._read_side_inputs(tags_and_types),
+          window_fn, context, tagged_receivers,
+          logger, self.step_name,
+          scoped_metrics_container=self.scoped_metrics_container)
+      self.dofn_receiver = (self.dofn_runner
+                            if isinstance(self.dofn_runner, Receiver)
+                            else DoFnRunnerReceiver(self.dofn_runner))
+
+      self.dofn_runner.start()
+
+  def finish(self):
+    with self.scoped_finish_state:
+      self.dofn_runner.finish()
+
+  def process(self, o):
+    with self.scoped_process_state:
+      self.dofn_receiver.receive(o)
+
+
+class DoFnRunnerReceiver(Receiver):
+
+  def __init__(self, dofn_runner):
+    self.dofn_runner = dofn_runner
+
+  def receive(self, windowed_value):
+    self.dofn_runner.process(windowed_value)
+
+
+class CombineOperation(Operation):
+  """A Combine operation executing a CombineFn for each input element."""
+
+  def __init__(self, operation_name, spec, counter_factory, state_sampler):
+    super(CombineOperation, self).__init__(
+        operation_name, spec, counter_factory, state_sampler)
+    # Combiners do not accept deferred side-inputs (the ignored fourth argument)
+    # and therefore the code to handle the extra args/kwargs is simpler than for
+    # the DoFn's of ParDo.
+    fn, args, kwargs = pickler.loads(self.spec.serialized_fn)[:3]
+    self.phased_combine_fn = (
+        PhasedCombineFnExecutor(self.spec.phase, fn, args, kwargs))
+
+  def finish(self):
+    logging.debug('Finishing %s', self)
+
+  def process(self, o):
+    if self.debug_logging_enabled:
+      logging.debug('Processing [%s] in %s', o, self)
+    key, values = o.value
+    with self.scoped_metrics_container:
+      self.output(
+          o.with_value((key, self.phased_combine_fn.apply(values))))
+
+
+def create_pgbk_op(step_name, spec, counter_factory, state_sampler):
+  if spec.combine_fn:
+    return PGBKCVOperation(step_name, spec, counter_factory, state_sampler)
+  else:
+    return PGBKOperation(step_name, spec, counter_factory, state_sampler)
+
+
+class PGBKOperation(Operation):
+  """Partial group-by-key operation.
+
+  This takes (windowed) input (key, value) tuples and outputs
+  (key, [value]) tuples, performing a best effort group-by-key for
+  values in this bundle, memory permitting.
+  """
+
+  def __init__(self, operation_name, spec, counter_factory, state_sampler):
+    super(PGBKOperation, self).__init__(
+        operation_name, spec, counter_factory, state_sampler)
+    assert not self.spec.combine_fn
+    self.table = collections.defaultdict(list)
+    self.size = 0
+    # TODO(robertwb) Make this configurable.
+    self.max_size = 10 * 1000
+
+  def process(self, o):
+    # TODO(robertwb): Structural (hashable) values.
+    key = o.value[0], tuple(o.windows)
+    self.table[key].append(o)
+    self.size += 1
+    if self.size > self.max_size:
+      self.flush(9 * self.max_size // 10)
+
+  def finish(self):
+    self.flush(0)
+
+  def flush(self, target):
+    limit = self.size - target
+    for ix, (kw, vs) in enumerate(self.table.items()):
+      if ix >= limit:
+        break
+      del self.table[kw]
+      key, windows = kw
+      output_value = [v.value[1] for v in vs]
+      windowed_value = WindowedValue(
+          (key, output_value),
+          vs[0].timestamp, windows)
+      self.output(windowed_value)
+
+
+class PGBKCVOperation(Operation):
+
+  def __init__(self, operation_name, spec, counter_factory, state_sampler):
+    super(PGBKCVOperation, self).__init__(
+        operation_name, spec, counter_factory, state_sampler)
+    # Combiners do not accept deferred side-inputs (the ignored fourth
+    # argument) and therefore the code to handle the extra args/kwargs is
+    # simpler than for the DoFn's of ParDo.
+    fn, args, kwargs = pickler.loads(self.spec.combine_fn)[:3]
+    self.combine_fn = curry_combine_fn(fn, args, kwargs)
+    if (getattr(fn.add_input, 'im_func', None)
+        is core.CombineFn.add_input.im_func):
+      # Old versions of the SDK have CombineFns that don't implement add_input.
+      self.combine_fn_add_input = (
+          lambda a, e: self.combine_fn.add_inputs(a, [e]))
+    else:
+      self.combine_fn_add_input = self.combine_fn.add_input
+    # Optimization for the (known tiny accumulator, often wide keyspace)
+    # combine functions.
+    # TODO(b/36567833): Bound by in-memory size rather than key count.
+    self.max_keys = (
+        1000 * 1000 if
+        isinstance(fn, (combiners.CountCombineFn, combiners.MeanCombineFn)) or
+        # TODO(b/36597732): Replace this 'or' part by adding the 'cy' optimized
+        # combiners to the short list above.
+        (isinstance(fn, core.CallableWrapperCombineFn) and
+         fn._fn in (min, max, sum)) else 100 * 1000)  # pylint: disable=protected-access
+    self.key_count = 0
+    self.table = {}
+
+  def process(self, wkv):
+    key, value = wkv.value
+    # pylint: disable=unidiomatic-typecheck
+    # Optimization for the global window case.
+    if len(wkv.windows) == 1 and type(wkv.windows[0]) is _global_window_type:
+      wkey = 0, key
+    else:
+      wkey = tuple(wkv.windows), key
+    entry = self.table.get(wkey, None)
+    if entry is None:
+      if self.key_count >= self.max_keys:
+        target = self.key_count * 9 // 10
+        old_wkeys = []
+        # TODO(robertwb): Use an LRU cache?
+        for old_wkey, old_wvalue in self.table.iteritems():
+          old_wkeys.append(old_wkey)  # Can't mutate while iterating.
+          self.output_key(old_wkey, old_wvalue[0])
+          self.key_count -= 1
+          if self.key_count <= target:
+            break
+        for old_wkey in reversed(old_wkeys):
+          del self.table[old_wkey]
+      self.key_count += 1
+      # We save the accumulator as a one element list so we can efficiently
+      # mutate when new values are added without searching the cache again.
+      entry = self.table[wkey] = [self.combine_fn.create_accumulator()]
+    entry[0] = self.combine_fn_add_input(entry[0], value)
+
+  def finish(self):
+    for wkey, value in self.table.iteritems():
+      self.output_key(wkey, value[0])
+    self.table = {}
+    self.key_count = 0
+
+  def output_key(self, wkey, value):
+    windows, key = wkey
+    if windows is 0:
+      self.output(_globally_windowed_value.with_value((key, value)))
+    else:
+      self.output(WindowedValue((key, value), windows[0].end, windows))
+
+
+class FlattenOperation(Operation):
+  """Flatten operation.
+
+  Receives one or more producer operations, outputs just one list
+  with all the items.
+  """
+
+  def __init__(self, operation_name, spec, counter_factory, state_sampler):
+    super(FlattenOperation, self).__init__(
+        operation_name, spec, counter_factory, state_sampler)
+
+  def process(self, o):
+    if self.debug_logging_enabled:
+      logging.debug('Processing [%s] in %s', o, self)
+    self.output(o)
+
+
+def create_operation(operation_name, spec, counter_factory, step_name,
+                     state_sampler, test_shuffle_source=None,
+                     test_shuffle_sink=None, is_streaming=False):
+  """Create Operation object for given operation specification."""
+  if isinstance(spec, operation_specs.WorkerRead):
+    if isinstance(spec.source, iobase.SourceBundle):
+      op = ReadOperation(
+          operation_name, spec, counter_factory, state_sampler)
+    else:
+      from dataflow_worker.native_operations import NativeReadOperation
+      op = NativeReadOperation(
+          operation_name, spec, counter_factory, state_sampler)
+  elif isinstance(spec, operation_specs.WorkerWrite):
+    from dataflow_worker.native_operations import NativeWriteOperation
+    op = NativeWriteOperation(
+        operation_name, spec, counter_factory, state_sampler)
+  elif isinstance(spec, operation_specs.WorkerCombineFn):
+    op = CombineOperation(
+        operation_name, spec, counter_factory, state_sampler)
+  elif isinstance(spec, operation_specs.WorkerPartialGroupByKey):
+    op = create_pgbk_op(operation_name, spec, counter_factory, state_sampler)
+  elif isinstance(spec, operation_specs.WorkerDoFn):
+    op = DoOperation(operation_name, spec, counter_factory, state_sampler)
+  elif isinstance(spec, operation_specs.WorkerGroupingShuffleRead):
+    from dataflow_worker.shuffle_operations import GroupedShuffleReadOperation
+    op = GroupedShuffleReadOperation(
+        operation_name, spec, counter_factory, state_sampler,
+        shuffle_source=test_shuffle_source)
+  elif isinstance(spec, operation_specs.WorkerUngroupedShuffleRead):
+    from dataflow_worker.shuffle_operations import UngroupedShuffleReadOperation
+    op = UngroupedShuffleReadOperation(
+        operation_name, spec, counter_factory, state_sampler,
+        shuffle_source=test_shuffle_source)
+  elif isinstance(spec, operation_specs.WorkerInMemoryWrite):
+    op = InMemoryWriteOperation(
+        operation_name, spec, counter_factory, state_sampler)
+  elif isinstance(spec, operation_specs.WorkerShuffleWrite):
+    from dataflow_worker.shuffle_operations import ShuffleWriteOperation
+    op = ShuffleWriteOperation(
+        operation_name, spec, counter_factory, state_sampler,
+        shuffle_sink=test_shuffle_sink)
+  elif isinstance(spec, operation_specs.WorkerFlatten):
+    op = FlattenOperation(
+        operation_name, spec, counter_factory, state_sampler)
+  elif isinstance(spec, operation_specs.WorkerMergeWindows):
+    from dataflow_worker.shuffle_operations import BatchGroupAlsoByWindowsOperation
+    from dataflow_worker.shuffle_operations import StreamingGroupAlsoByWindowsOperation
+    if is_streaming:
+      op = StreamingGroupAlsoByWindowsOperation(
+          operation_name, spec, counter_factory, state_sampler)
+    else:
+      op = BatchGroupAlsoByWindowsOperation(
+          operation_name, spec, counter_factory, state_sampler)
+  elif isinstance(spec, operation_specs.WorkerReifyTimestampAndWindows):
+    from dataflow_worker.shuffle_operations import ReifyTimestampAndWindowsOperation
+    op = ReifyTimestampAndWindowsOperation(
+        operation_name, spec, counter_factory, state_sampler)
+  else:
+    raise TypeError('Expected an instance of operation_specs.Worker* class '
+                    'instead of %s' % (spec,))
+  op.step_name = step_name
+  op.metrics_container = MetricsContainer(step_name)
+  op.scoped_metrics_container = ScopedMetricsContainer(op.metrics_container)
+  return op
+
+
+class SimpleMapTaskExecutor(object):
+  """An executor for map tasks.
+
+   Stores progress of the read operation that is the first operation of a map
+   task.
+  """
+
+  def __init__(
+      self, map_task, counter_factory, state_sampler,
+      test_shuffle_source=None, test_shuffle_sink=None):
+    """Initializes SimpleMapTaskExecutor.
+
+    Args:
+      map_task: The map task we are to run.
+      counter_factory: The CounterFactory instance for the work item.
+      state_sampler: The StateSampler tracking the execution step.
+      test_shuffle_source: Used during tests for dependency injection into
+        shuffle read operation objects.
+      test_shuffle_sink: Used during tests for dependency injection into
+        shuffle write operation objects.
+    """
+
+    self._map_task = map_task
+    self._counter_factory = counter_factory
+    self._ops = []
+    self._state_sampler = state_sampler
+    self._test_shuffle_source = test_shuffle_source
+    self._test_shuffle_sink = test_shuffle_sink
+
+  def operations(self):
+    return self._ops[:]
+
+  def execute(self):
+    """Executes all the operation_specs.Worker* instructions in a map task.
+
+    We update the map_task with the execution status, expressed as counters.
+
+    Raises:
+      RuntimeError: if we find more than on read instruction in task spec.
+      TypeError: if the spec parameter is not an instance of the recognized
+        operation_specs.Worker* classes.
+    """
+
+    # operations is a list of operation_specs.Worker* instances. The order of the
+    # elements is important because the inputs use list indexes as references.
+
+    step_names = (
+        self._map_task.step_names or [None] * len(self._map_task.operations))
+    for ix, spec in enumerate(self._map_task.operations):
+      # This is used for logging and assigning names to counters.
+      operation_name = self._map_task.system_names[ix]
+      step_name = step_names[ix]
+      op = create_operation(
+          operation_name, spec, self._counter_factory, step_name,
+          self._state_sampler,
+          test_shuffle_source=self._test_shuffle_source,
+          test_shuffle_sink=self._test_shuffle_sink)
+      self._ops.append(op)
+
+      # Add receiver operations to the appropriate producers.
+      if hasattr(op.spec, 'input'):
+        producer, output_index = op.spec.input
+        self._ops[producer].add_receiver(op, output_index)
+      # Flatten has 'inputs', not 'input'
+      if hasattr(op.spec, 'inputs'):
+        for producer, output_index in op.spec.inputs:
+          self._ops[producer].add_receiver(op, output_index)
+
+    for ix, op in reversed(list(enumerate(self._ops))):
+      logging.debug('Starting op %d %s', ix, op)
+      with op.scoped_metrics_container:
+        op.start()
+    for op in self._ops:
+      with op.scoped_metrics_container:
+        op.finish()
diff --git a/sdks/python/apache_beam/runners/sdk_harness.py b/sdks/python/apache_beam/runners/worker/sdk_harness.py
similarity index 100%
rename from sdks/python/apache_beam/runners/sdk_harness.py
rename to sdks/python/apache_beam/runners/worker/sdk_harness.py
diff --git a/sdks/python/apache_beam/runners/sdk_harness_test.py b/sdks/python/apache_beam/runners/worker/sdk_harness_test.py
similarity index 100%
rename from sdks/python/apache_beam/runners/sdk_harness_test.py
rename to sdks/python/apache_beam/runners/worker/sdk_harness_test.py
diff --git a/sdks/python/apache_beam/runners/fn_harness.py b/sdks/python/apache_beam/runners/worker/sdk_worker_main.py
similarity index 100%
rename from sdks/python/apache_beam/runners/fn_harness.py
rename to sdks/python/apache_beam/runners/worker/sdk_worker_main.py
diff --git a/sdks/python/apache_beam/runners/worker/sideinputs.py b/sdks/python/apache_beam/runners/worker/sideinputs.py
new file mode 100644
index 000000000000..cc9e5e23898d
--- /dev/null
+++ b/sdks/python/apache_beam/runners/worker/sideinputs.py
@@ -0,0 +1,162 @@
+# Copyright 2016 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the 'License');
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an 'AS IS' BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Utilities for handling side inputs."""
+
+import collections
+import logging
+import Queue
+import threading
+import traceback
+
+from apache_beam.io import iobase
+from apache_beam.transforms import window
+
+
+# Maximum number of reader threads for reading side input sources, per side
+# input.
+MAX_SOURCE_READER_THREADS = 15
+
+# Number of slots for elements in side input element queue.  Note that this
+# value is intentionally smaller than MAX_SOURCE_READER_THREADS so as to reduce
+# memory pressure of holding potentially-large elements in memory.  Note that
+# the number of pending elements in memory is equal to the sum of
+# MAX_SOURCE_READER_THREADS and ELEMENT_QUEUE_SIZE.
+ELEMENT_QUEUE_SIZE = 10
+
+# Special element value sentinel for signaling reader state.
+READER_THREAD_IS_DONE_SENTINEL = object()
+
+# Used to efficiently window the values of non-windowed side inputs.
+_globally_windowed = window.GlobalWindows.windowed_value(None).with_value
+
+
+class PrefetchingSourceSetIterable(object):
+  """Value iterator that reads concurrently from a set of sources."""
+
+  def __init__(self, sources,
+               max_reader_threads=MAX_SOURCE_READER_THREADS):
+    self.sources = sources
+    self.num_reader_threads = min(max_reader_threads, len(self.sources))
+
+    # Queue for sources that are to be read.
+    self.sources_queue = Queue.Queue()
+    for source in sources:
+      self.sources_queue.put(source)
+    # Queue for elements that have been read.
+    self.element_queue = Queue.Queue(ELEMENT_QUEUE_SIZE)
+    # Queue for exceptions encountered in reader threads; to be rethrown.
+    self.reader_exceptions = Queue.Queue()
+    # Whether we have already iterated; this iterable can only be used once.
+    self.already_iterated = False
+    # Whether an error was encountered in any source reader.
+    self.has_errored = False
+
+    self.reader_threads = []
+    self._start_reader_threads()
+
+  def _start_reader_threads(self):
+    for _ in range(0, self.num_reader_threads):
+      t = threading.Thread(target=self._reader_thread)
+      t.daemon = True
+      t.start()
+      self.reader_threads.append(t)
+
+  def _reader_thread(self):
+    try:
+      while True:
+        try:
+          source = self.sources_queue.get_nowait()
+          if isinstance(source, iobase.BoundedSource):
+            for value in source.read(source.get_range_tracker(None, None)):
+              if self.has_errored:
+                # If any reader has errored, just return.
+                return
+              if isinstance(value, window.WindowedValue):
+                self.element_queue.put(value)
+              else:
+                self.element_queue.put(_globally_windowed(value))
+          else:
+            # Native dataflow source.
+            with source.reader() as reader:
+              returns_windowed_values = reader.returns_windowed_values
+              for value in reader:
+                if self.has_errored:
+                  # If any reader has errored, just return.
+                  return
+                if returns_windowed_values:
+                  self.element_queue.put(value)
+                else:
+                  self.element_queue.put(_globally_windowed(value))
+        except Queue.Empty:
+          return
+    except Exception as e:  # pylint: disable=broad-except
+      logging.error('Encountered exception in PrefetchingSourceSetIterable '
+                    'reader thread: %s', traceback.format_exc())
+      self.reader_exceptions.put(e)
+      self.has_errored = True
+    finally:
+      self.element_queue.put(READER_THREAD_IS_DONE_SENTINEL)
+
+  def __iter__(self):
+    if self.already_iterated:
+      raise RuntimeError(
+          'Can only iterate once over PrefetchingSourceSetIterable instance.')
+    self.already_iterated = True
+
+    # The invariants during execution are:
+    # 1) A worker thread always posts the sentinel as the last thing it does
+    #    before exiting.
+    # 2) We always wait for all sentinels and then join all threads.
+    num_readers_finished = 0
+    try:
+      while True:
+        element = self.element_queue.get()
+        if element is READER_THREAD_IS_DONE_SENTINEL:
+          num_readers_finished += 1
+          if num_readers_finished == self.num_reader_threads:
+            return
+        elif self.has_errored:
+          raise self.reader_exceptions.get()
+        else:
+          yield element
+    except GeneratorExit:
+      self.has_errored = True
+      raise
+    finally:
+      while num_readers_finished < self.num_reader_threads:
+        element = self.element_queue.get()
+        if element is READER_THREAD_IS_DONE_SENTINEL:
+          num_readers_finished += 1
+      for t in self.reader_threads:
+        t.join()
+
+
+def get_iterator_fn_for_sources(
+    sources, max_reader_threads=MAX_SOURCE_READER_THREADS):
+  """Returns callable that returns iterator over elements for given sources."""
+  def _inner():
+    return iter(PrefetchingSourceSetIterable(
+        sources, max_reader_threads=max_reader_threads))
+  return _inner
+
+
+class EmulatedIterable(collections.Iterable):
+  """Emulates an iterable for a side input."""
+
+  def __init__(self, iterator_fn):
+    self.iterator_fn = iterator_fn
+
+  def __iter__(self):
+    return self.iterator_fn()
diff --git a/sdks/python/apache_beam/runners/worker/sideinputs_test.py b/sdks/python/apache_beam/runners/worker/sideinputs_test.py
new file mode 100644
index 000000000000..8c54fe8b8c88
--- /dev/null
+++ b/sdks/python/apache_beam/runners/worker/sideinputs_test.py
@@ -0,0 +1,149 @@
+# Copyright 2016 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Tests for side input utilities."""
+
+import logging
+import time
+import unittest
+
+import google3
+
+from dataflow_worker import sideinputs
+
+
+def strip_windows(iterator):
+  return [wv.value for wv in iterator]
+
+
+class FakeSource(object):
+
+  def __init__(self, items):
+    self.items = items
+
+  def reader(self):
+    return FakeSourceReader(self.items)
+
+
+class FakeSourceReader(object):
+
+  def __init__(self, items):
+    self.items = items
+    self.entered = False
+    self.exited = False
+
+  def __iter__(self):
+    return iter(self.items)
+
+  def __enter__(self):
+    self.entered = True
+    return self
+
+  def __exit__(self, exception_type, exception_value, traceback):
+    self.exited = True
+
+  @property
+  def returns_windowed_values(self):
+    return False
+
+
+class PrefetchingSourceIteratorTest(unittest.TestCase):
+
+  def test_single_source_iterator_fn(self):
+    sources = [
+        FakeSource([0, 1, 2, 3, 4, 5]),
+    ]
+    iterator_fn = sideinputs.get_iterator_fn_for_sources(
+        sources, max_reader_threads=2)
+    assert list(strip_windows(iterator_fn())) == range(6)
+
+  def test_multiple_sources_iterator_fn(self):
+    sources = [
+        FakeSource([0]),
+        FakeSource([1, 2, 3, 4, 5]),
+        FakeSource([]),
+        FakeSource([6, 7, 8, 9, 10]),
+    ]
+    iterator_fn = sideinputs.get_iterator_fn_for_sources(
+        sources, max_reader_threads=3)
+    assert sorted(strip_windows(iterator_fn())) == range(11)
+
+  def test_multiple_sources_single_reader_iterator_fn(self):
+    sources = [
+        FakeSource([0]),
+        FakeSource([1, 2, 3, 4, 5]),
+        FakeSource([]),
+        FakeSource([6, 7, 8, 9, 10]),
+    ]
+    iterator_fn = sideinputs.get_iterator_fn_for_sources(
+        sources, max_reader_threads=1)
+    assert list(strip_windows(iterator_fn())) == range(11)
+
+  def test_source_iterator_fn_exception(self):
+    class MyException(Exception):
+      pass
+
+    def exception_generator():
+      yield 0
+      time.sleep(0.1)
+      raise MyException('I am an exception!')
+
+    def perpetual_generator(value):
+      while True:
+        yield value
+
+    sources = [
+        FakeSource(perpetual_generator(1)),
+        FakeSource(perpetual_generator(2)),
+        FakeSource(perpetual_generator(3)),
+        FakeSource(perpetual_generator(4)),
+        FakeSource(exception_generator()),
+    ]
+    iterator_fn = sideinputs.get_iterator_fn_for_sources(sources)
+    seen = set()
+    with self.assertRaises(MyException):
+      for value in iterator_fn():
+        seen.add(value.value)
+    self.assertEqual(sorted(seen), range(5))
+
+
+class EmulatedCollectionsTest(unittest.TestCase):
+
+  def test_emulated_iterable(self):
+    def _iterable_fn():
+      for i in range(10):
+        yield i
+    iterable = sideinputs.EmulatedIterable(_iterable_fn)
+    # Check that multiple iterations are supported.
+    for _ in range(0, 5):
+      for i, j in enumerate(iterable):
+        self.assertEqual(i, j)
+
+  def test_large_iterable_values(self):
+    # Here, we create a large collection that would be too big for memory-
+    # constained test environments, but should be under the memory limit if
+    # materialized one at a time.
+    def _iterable_fn():
+      for i in range(10):
+        yield ('%d' % i) * (200 * 1024 * 1024)
+    iterable = sideinputs.EmulatedIterable(_iterable_fn)
+    # Check that multiple iterations are supported.
+    for _ in range(0, 3):
+      for i, j in enumerate(iterable):
+        self.assertEqual(('%d' % i) * (200 * 1024 * 1024), j)
+
+
+if __name__ == '__main__':
+  logging.getLogger().setLevel(logging.INFO)
+  unittest.main()
diff --git a/sdks/python/apache_beam/runners/worker/statesampler.pyx b/sdks/python/apache_beam/runners/worker/statesampler.pyx
new file mode 100644
index 000000000000..6f176b07a39f
--- /dev/null
+++ b/sdks/python/apache_beam/runners/worker/statesampler.pyx
@@ -0,0 +1,235 @@
+# Copyright 2016 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# cython: profile=True
+
+"""State sampler for tracking time spent in execution steps.
+
+The state sampler profiles the time spent in each step of a Dataflow pipeline.
+Operations (defined in executor.py) which are executed as part of a MapTask are
+instrumented with context managers provided by StateSampler.scoped_state().
+These context managers change the internal state of the StateSampler during each
+relevant Operation's .start(), .process() and .finish() methods.  State is
+sampled by a raw C thread, not holding the Python Global Interpreter Lock, which
+queries the StateSampler's internal state at a defined sampling frequency.  In a
+common example, a ReadOperation during its .start() method reads an element and
+calls a DoOperation's .process() method, which can call a WriteOperation's
+.process() method.  Each element processed causes the current state to
+transition between these states of different Operations.  Each time the sampling
+thread queries the current state, the time spent since the previous sample is
+attributed to that state and accumulated.  Over time, this allows a granular
+runtime profile to be produced.
+"""
+
+import threading
+import time
+
+
+from apache_beam.utils.counters import Counter
+
+
+cimport cython
+from cpython cimport pythread
+from libc.stdint cimport int32_t, int64_t
+
+cdef extern from "Python.h":
+  # This typically requires the GIL, but we synchronize the list modifications
+  # we use this on via our own lock.
+  cdef void* PyList_GET_ITEM(list, Py_ssize_t index) nogil
+
+cdef extern from "unistd.h" nogil:
+  void usleep(long)
+
+cdef extern from "<time.h>" nogil:
+  struct timespec:
+    long tv_sec  # seconds
+    long tv_nsec  # nanoseconds
+  int clock_gettime(int clock_id, timespec *result)
+
+cdef inline int64_t get_nsec_time() nogil:
+  """Get current time as microseconds since Unix epoch."""
+  cdef timespec current_time
+  # First argument value of 0 corresponds to CLOCK_REALTIME.
+  clock_gettime(0, &current_time)
+  return (
+      (<int64_t> current_time.tv_sec) * 1000000000 +  # second to nanoseconds
+      current_time.tv_nsec)
+
+
+class StateSamplerInfo(object):
+  """Info for current state and transition statistics of StateSampler."""
+
+  def __init__(self, state_name, transition_count):
+    self.state_name = state_name
+    self.transition_count = transition_count
+
+  def __repr__(self):
+    return '<StateSamplerInfo %s %d>' % (self.state_name, self.transition_count)
+
+
+# Default period for sampling current state of pipeline execution.
+DEFAULT_SAMPLING_PERIOD_MS = 200
+
+
+cdef class StateSampler(object):
+  """Tracks time spent in states during pipeline execution."""
+
+  cdef object prefix
+  cdef object counter_factory
+  cdef int sampling_period_ms
+
+  cdef dict scoped_states_by_name
+  cdef list scoped_states_by_index
+
+  cdef bint started
+  cdef bint finished
+  cdef object sampling_thread
+
+  # This lock guards members that are shared between threads, specificaly
+  # finished, scoped_states_by_index, and the nsecs field of each state therein.
+  cdef pythread.PyThread_type_lock lock
+
+  cdef public int64_t state_transition_count
+
+  cdef int32_t current_state_index
+
+  def __init__(self, prefix, counter_factory,
+      sampling_period_ms=DEFAULT_SAMPLING_PERIOD_MS):
+
+    self.prefix = prefix
+    self.counter_factory = counter_factory
+    self.sampling_period_ms = sampling_period_ms
+
+    self.lock = pythread.PyThread_allocate_lock()
+    self.scoped_states_by_name = {}
+
+    self.current_state_index = 0
+    unknown_state = ScopedState(self, 'unknown', self.current_state_index)
+    pythread.PyThread_acquire_lock(self.lock, pythread.WAIT_LOCK)
+    self.scoped_states_by_index = [unknown_state]
+    self.finished = False
+    pythread.PyThread_release_lock(self.lock)
+
+    # Assert that the compiler correctly aligned the current_state field.  This
+    # is necessary for reads and writes to this variable to be atomic across
+    # threads without additional synchronization.
+    # States are referenced via an index rather than, say, a pointer because
+    # of better support for 32-bit atomic reads and writes.
+    assert (<int64_t> &self.current_state_index) % sizeof(int32_t) == 0, (
+        'Address of StateSampler.current_state_index is not word-aligned.')
+
+  def __dealloc__(self):
+    pythread.PyThread_free_lock(self.lock)
+
+  def run(self):
+    cdef int64_t last_nsecs = get_nsec_time()
+    cdef int64_t elapsed_nsecs
+    with nogil:
+      while True:
+        usleep(self.sampling_period_ms * 1000)
+        pythread.PyThread_acquire_lock(self.lock, pythread.WAIT_LOCK)
+        try:
+          if self.finished:
+            break
+          elapsed_nsecs = get_nsec_time() - last_nsecs
+          # Take an address as we can't create a reference to the scope
+          # without the GIL.
+          nsecs_ptr = &(<ScopedState>PyList_GET_ITEM(
+              self.scoped_states_by_index, self.current_state_index)).nsecs
+          nsecs_ptr[0] += elapsed_nsecs
+          last_nsecs += elapsed_nsecs
+        finally:
+          pythread.PyThread_release_lock(self.lock)
+    self.end_time = time.time()
+
+  def start(self):
+    assert not self.started
+    self.started = True
+    self.sampling_thread = threading.Thread(target=self.run)
+    self.sampling_thread.start()
+
+  def stop(self):
+    assert not self.finished
+    pythread.PyThread_acquire_lock(self.lock, pythread.WAIT_LOCK)
+    self.finished = True
+    pythread.PyThread_release_lock(self.lock)
+    # May have to wait up to sampling_period_ms, but the platform-independent
+    # pythread doesn't support conditions.
+    self.sampling_thread.join()
+
+  def stop_if_still_running(self):
+    if self.started and not self.finished:
+      self.stop()
+
+  def get_info(self):
+    """Returns StateSamplerInfo with transition statistics."""
+    return StateSamplerInfo(
+        self.scoped_states_by_index[self.current_state_index].name,
+        self.state_transition_count)
+
+  def scoped_state(self, name):
+    """Returns a context manager managing transitions for a given state."""
+    cdef ScopedState scoped_state = self.scoped_states_by_name.get(name, None)
+    if scoped_state is None:
+      output_counter = self.counter_factory.get_counter(
+          '%s%s-msecs' % (self.prefix,  name), Counter.SUM)
+      new_state_index = len(self.scoped_states_by_index)
+      scoped_state = ScopedState(self, name, new_state_index, output_counter)
+      # Both scoped_states_by_index and scoped_state.nsecs are accessed
+      # by the sampling thread; initialize them under the lock.
+      pythread.PyThread_acquire_lock(self.lock, pythread.WAIT_LOCK)
+      self.scoped_states_by_index.append(scoped_state)
+      scoped_state.nsecs = 0
+      pythread.PyThread_release_lock(self.lock)
+      self.scoped_states_by_name[name] = scoped_state
+    return scoped_state
+
+  def commit_counters(self):
+    """Updates output counters with latest state statistics."""
+    for state in self.scoped_states_by_name.values():
+      state_msecs = int(1e-6 * state.nsecs)
+      state.counter.update(state_msecs - state.counter.value())
+
+
+cdef class ScopedState(object):
+  """Context manager class managing transitions for a given sampler state."""
+
+  cdef readonly StateSampler sampler
+  cdef readonly int32_t state_index
+  cdef readonly object counter
+  cdef readonly object name
+  cdef readonly int64_t nsecs
+  cdef int32_t old_state_index
+
+  def __init__(self, sampler, name, state_index, counter=None):
+    self.sampler = sampler
+    self.name = name
+    self.state_index = state_index
+    self.counter = counter
+
+  cpdef __enter__(self):
+    self.old_state_index = self.sampler.current_state_index
+    pythread.PyThread_acquire_lock(self.sampler.lock, pythread.WAIT_LOCK)
+    self.sampler.current_state_index = self.state_index
+    pythread.PyThread_release_lock(self.sampler.lock)
+    self.sampler.state_transition_count += 1
+
+  cpdef __exit__(self, unused_exc_type, unused_exc_value, unused_traceback):
+    pythread.PyThread_acquire_lock(self.sampler.lock, pythread.WAIT_LOCK)
+    self.sampler.current_state_index = self.old_state_index
+    pythread.PyThread_release_lock(self.sampler.lock)
+    self.sampler.state_transition_count += 1
+
+  def __repr__(self):
+    return "ScopedState[%s, %s, %s]" % (self.name, self.state_index, self.nsecs)
diff --git a/sdks/python/apache_beam/runners/worker/statesampler_test.py b/sdks/python/apache_beam/runners/worker/statesampler_test.py
new file mode 100644
index 000000000000..1e9224a3a1e6
--- /dev/null
+++ b/sdks/python/apache_beam/runners/worker/statesampler_test.py
@@ -0,0 +1,92 @@
+# Copyright 2016 Google Inc. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+"""Tests for state sampler."""
+
+import logging
+import time
+import unittest
+
+import google3
+
+from apache_beam.utils.counters import CounterFactory
+from dataflow_worker import statesampler
+
+
+class StateSamplerTest(unittest.TestCase):
+
+  def test_basic_sampler(self):
+    # Set up state sampler.
+    counter_factory = CounterFactory()
+    sampler = statesampler.StateSampler('basic-', counter_factory,
+                                        sampling_period_ms=1)
+
+    # Run basic workload transitioning between 3 states.
+    sampler.start()
+    with sampler.scoped_state('statea'):
+      time.sleep(0.1)
+      with sampler.scoped_state('stateb'):
+        time.sleep(0.2 / 2)
+        with sampler.scoped_state('statec'):
+          time.sleep(0.3)
+        time.sleep(0.2 / 2)
+    sampler.stop()
+    sampler.commit_counters()
+
+    # Test that sampled state timings are within 5% of expected values.
+    expected_counter_values = {
+        'basic-statea-msecs': 100,
+        'basic-stateb-msecs': 200,
+        'basic-statec-msecs': 300,
+    }
+    for counter in counter_factory.get_counters():
+      self.assertIn(counter.name, expected_counter_values)
+      expected_value = expected_counter_values[counter.name]
+      actual_value = counter.value()
+      self.assertGreater(actual_value, expected_value * 0.75)
+      self.assertLess(actual_value, expected_value * 1.25)
+
+  def test_sampler_transition_overhead(self):
+    # Set up state sampler.
+    counter_factory = CounterFactory()
+    sampler = statesampler.StateSampler('overhead-', counter_factory,
+                                        sampling_period_ms=10)
+
+    # Run basic workload transitioning between 3 states.
+    state_a = sampler.scoped_state('statea')
+    state_b = sampler.scoped_state('stateb')
+    state_c = sampler.scoped_state('statec')
+    start_time = time.time()
+    sampler.start()
+    for _ in range(100000):
+      with state_a:
+        with state_b:
+          for _ in range(10):
+            with state_c:
+              pass
+    sampler.stop()
+    elapsed_time = time.time() - start_time
+    state_transition_count = sampler.get_info().transition_count
+    overhead_us = 1000000.0 * elapsed_time / state_transition_count
+    logging.info('Overhead per transition: %fus', overhead_us)
+    # Conservative upper bound on overhead in microseconds (we expect this to
+    # take 0.17us when compiled in opt mode or 0.48 us when compiled with in
+    # debug mode).
+    self.assertLess(overhead_us, 10.0)
+
+
+if __name__ == '__main__':
+  logging.getLogger().setLevel(logging.INFO)
+  unittest.main()
+

From 23d1faf4934b8a43b83f66b0232b72794a18c473 Mon Sep 17 00:00:00 2001
From: Robert Bradshaw <robertwb@gmail.com>
Date: Thu, 20 Apr 2017 14:53:26 -0700
Subject: [PATCH 03/33] cythonization works

---
 sdks/python/apache_beam/runners/worker/logger_test.py       | 3 +--
 sdks/python/apache_beam/runners/worker/opcounters_test.py   | 3 +--
 sdks/python/apache_beam/runners/worker/operations.pxd       | 2 +-
 sdks/python/apache_beam/runners/worker/operations.py        | 6 +++---
 sdks/python/apache_beam/runners/worker/sdk_harness.py       | 2 +-
 sdks/python/apache_beam/runners/worker/sideinputs_test.py   | 4 +---
 sdks/python/apache_beam/runners/worker/statesampler_test.py | 4 +---
 sdks/python/setup.py                                        | 5 ++++-
 8 files changed, 13 insertions(+), 16 deletions(-)

diff --git a/sdks/python/apache_beam/runners/worker/logger_test.py b/sdks/python/apache_beam/runners/worker/logger_test.py
index 657a1de35679..04df1822196a 100644
--- a/sdks/python/apache_beam/runners/worker/logger_test.py
+++ b/sdks/python/apache_beam/runners/worker/logger_test.py
@@ -20,8 +20,7 @@
 import threading
 import unittest
 
-import google3
-from dataflow_worker import logger
+from apache_beam.runners.worker import logger
 
 
 class PerThreadLoggingContextTest(unittest.TestCase):
diff --git a/sdks/python/apache_beam/runners/worker/opcounters_test.py b/sdks/python/apache_beam/runners/worker/opcounters_test.py
index 8f6aa7e36c4e..34965238cd1a 100644
--- a/sdks/python/apache_beam/runners/worker/opcounters_test.py
+++ b/sdks/python/apache_beam/runners/worker/opcounters_test.py
@@ -19,11 +19,10 @@
 import random
 import unittest
 
-import google3
 from apache_beam import coders
+from apache_beam.runners.worker.opcounters import OperationCounters
 from apache_beam.transforms.window import GlobalWindows
 from apache_beam.utils.counters import CounterFactory
-from dataflow_worker.opcounters import OperationCounters
 
 
 # Classes to test that we can handle a variety of objects.
diff --git a/sdks/python/apache_beam/runners/worker/operations.pxd b/sdks/python/apache_beam/runners/worker/operations.pxd
index 06cd062bad4d..eee75a692b64 100644
--- a/sdks/python/apache_beam/runners/worker/operations.pxd
+++ b/sdks/python/apache_beam/runners/worker/operations.pxd
@@ -15,8 +15,8 @@
 cimport cython
 
 from apache_beam.runners.common cimport Receiver
+from apache_beam.runners.worker cimport opcounters
 from apache_beam.utils.windowed_value cimport WindowedValue
-from dataflow_worker cimport opcounters
 from apache_beam.metrics.execution cimport ScopedMetricsContainer
 
 
diff --git a/sdks/python/apache_beam/runners/worker/operations.py b/sdks/python/apache_beam/runners/worker/operations.py
index 458b4bfafad6..5a5f4283e2e2 100644
--- a/sdks/python/apache_beam/runners/worker/operations.py
+++ b/sdks/python/apache_beam/runners/worker/operations.py
@@ -28,6 +28,9 @@
 from apache_beam.runners import common
 from apache_beam.runners.common import Receiver
 from apache_beam.runners.dataflow.internal.names import PropertyNames
+from apache_beam.runners.worker import logger
+from apache_beam.runners.worker import operation_specs
+from apache_beam.runners.worker import sideinputs
 from apache_beam.transforms import combiners
 from apache_beam.transforms import core
 from apache_beam.transforms import sideinputs as apache_sideinputs
@@ -36,9 +39,6 @@
 from apache_beam.transforms.window import GlobalWindows
 from apache_beam.utils.windowed_value import WindowedValue
 import cython
-from dataflow_worker import logger
-from dataflow_worker import operation_specs
-from dataflow_worker import sideinputs
 
 _globally_windowed_value = GlobalWindows.windowed_value(None)
 _global_window_type = type(_globally_windowed_value.windows[0])
diff --git a/sdks/python/apache_beam/runners/worker/sdk_harness.py b/sdks/python/apache_beam/runners/worker/sdk_harness.py
index c7fcccbc3a93..b6fad566d5b5 100644
--- a/sdks/python/apache_beam/runners/worker/sdk_harness.py
+++ b/sdks/python/apache_beam/runners/worker/sdk_harness.py
@@ -38,7 +38,7 @@
 from dataflow_worker.fn_harness.data_plane import GrpcClientDataChannelFactory
 import dill
 
-from google3.google.protobuf import wrappers_pb2
+from google.protobuf import wrappers_pb2
 
 
 DATA_INPUT_URN = 'urn:org.apache.beam:source:runner:0.1'
diff --git a/sdks/python/apache_beam/runners/worker/sideinputs_test.py b/sdks/python/apache_beam/runners/worker/sideinputs_test.py
index 8c54fe8b8c88..53cfa87deb7d 100644
--- a/sdks/python/apache_beam/runners/worker/sideinputs_test.py
+++ b/sdks/python/apache_beam/runners/worker/sideinputs_test.py
@@ -18,9 +18,7 @@
 import time
 import unittest
 
-import google3
-
-from dataflow_worker import sideinputs
+from apache_beam.runners.worker import sideinputs
 
 
 def strip_windows(iterator):
diff --git a/sdks/python/apache_beam/runners/worker/statesampler_test.py b/sdks/python/apache_beam/runners/worker/statesampler_test.py
index 1e9224a3a1e6..955b06bf8284 100644
--- a/sdks/python/apache_beam/runners/worker/statesampler_test.py
+++ b/sdks/python/apache_beam/runners/worker/statesampler_test.py
@@ -18,10 +18,8 @@
 import time
 import unittest
 
-import google3
-
+from apache_beam.runners.worker import statesampler
 from apache_beam.utils.counters import CounterFactory
-from dataflow_worker import statesampler
 
 
 class StateSamplerTest(unittest.TestCase):
diff --git a/sdks/python/setup.py b/sdks/python/setup.py
index 615931b3c90e..375358cfbeb2 100644
--- a/sdks/python/setup.py
+++ b/sdks/python/setup.py
@@ -125,8 +125,11 @@ def get_version():
     ext_modules=cythonize([
         '**/*.pyx',
         'apache_beam/coders/coder_impl.py',
-        'apache_beam/runners/common.py',
         'apache_beam/metrics/execution.py',
+        'apache_beam/runners/common.py',
+        'apache_beam/runners/worker/logger.py',
+        'apache_beam/runners/worker/opcounters.py',
+        'apache_beam/runners/worker/operations.py',
         'apache_beam/transforms/cy_combiners.py',
         'apache_beam/utils/counters.py',
         'apache_beam/utils/windowed_value.py',

From bfae5730aeac394b9b16284cfca7c4ba33369de4 Mon Sep 17 00:00:00 2001
From: Robert Bradshaw <robertwb@gmail.com>
Date: Thu, 20 Apr 2017 15:07:37 -0700
Subject: [PATCH 04/33] move sdk_harness to sdk_worker

---
 .../apache_beam/runners/worker/{sdk_harness.py => sdk_worker.py}  | 0
 .../runners/worker/{sdk_harness_test.py => sdk_worker_test.py}    | 0
 2 files changed, 0 insertions(+), 0 deletions(-)
 rename sdks/python/apache_beam/runners/worker/{sdk_harness.py => sdk_worker.py} (100%)
 rename sdks/python/apache_beam/runners/worker/{sdk_harness_test.py => sdk_worker_test.py} (100%)

diff --git a/sdks/python/apache_beam/runners/worker/sdk_harness.py b/sdks/python/apache_beam/runners/worker/sdk_worker.py
similarity index 100%
rename from sdks/python/apache_beam/runners/worker/sdk_harness.py
rename to sdks/python/apache_beam/runners/worker/sdk_worker.py
diff --git a/sdks/python/apache_beam/runners/worker/sdk_harness_test.py b/sdks/python/apache_beam/runners/worker/sdk_worker_test.py
similarity index 100%
rename from sdks/python/apache_beam/runners/worker/sdk_harness_test.py
rename to sdks/python/apache_beam/runners/worker/sdk_worker_test.py

From f1a97297f53b54a64088b77119bec1a5d49c6c36 Mon Sep 17 00:00:00 2001
From: Robert Bradshaw <robertwb@gmail.com>
Date: Thu, 20 Apr 2017 15:09:33 -0700
Subject: [PATCH 05/33] Remove unused end_time from statesampler.

---
 sdks/python/apache_beam/runners/worker/statesampler.pyx | 1 -
 1 file changed, 1 deletion(-)

diff --git a/sdks/python/apache_beam/runners/worker/statesampler.pyx b/sdks/python/apache_beam/runners/worker/statesampler.pyx
index 6f176b07a39f..a311078b2f5f 100644
--- a/sdks/python/apache_beam/runners/worker/statesampler.pyx
+++ b/sdks/python/apache_beam/runners/worker/statesampler.pyx
@@ -151,7 +151,6 @@ cdef class StateSampler(object):
           last_nsecs += elapsed_nsecs
         finally:
           pythread.PyThread_release_lock(self.lock)
-    self.end_time = time.time()
 
   def start(self):
     assert not self.started

From 64179100caf6aeb32fcab1741bb89e710d8e59cf Mon Sep 17 00:00:00 2001
From: Robert Bradshaw <robertwb@gmail.com>
Date: Fri, 21 Apr 2017 12:36:27 -0700
Subject: [PATCH 06/33] implement portpicker, fix more imports

---
 .../apache_beam/runners/worker/data_plane.py  |  2 +-
 .../runners/worker/data_plane_test.py         |  9 +++---
 .../apache_beam/runners/worker/log_handler.py |  2 +-
 .../runners/worker/log_handler_test.py        |  9 +++---
 .../apache_beam/runners/worker/sdk_worker.py  | 16 +++++-----
 .../runners/worker/sdk_worker_main.py         |  8 ++---
 .../runners/worker/sdk_worker_test.py         | 15 ++++-----
 sdks/python/apache_beam/utils/portpicker.py   | 31 +++++++++++++++++++
 8 files changed, 63 insertions(+), 29 deletions(-)
 create mode 100644 sdks/python/apache_beam/utils/portpicker.py

diff --git a/sdks/python/apache_beam/runners/worker/data_plane.py b/sdks/python/apache_beam/runners/worker/data_plane.py
index 892f33c69bb1..cde37cd0b156 100644
--- a/sdks/python/apache_beam/runners/worker/data_plane.py
+++ b/sdks/python/apache_beam/runners/worker/data_plane.py
@@ -24,7 +24,7 @@
 import threading
 
 from apache_beam.coders import coder_impl
-from dataflow_worker.fn_harness import beam_fn_api_pb2
+from apache_beam.runners.api import beam_fn_api_pb2
 import grpc
 
 
diff --git a/sdks/python/apache_beam/runners/worker/data_plane_test.py b/sdks/python/apache_beam/runners/worker/data_plane_test.py
index abd04171d7be..51bbe9dd42f0 100644
--- a/sdks/python/apache_beam/runners/worker/data_plane_test.py
+++ b/sdks/python/apache_beam/runners/worker/data_plane_test.py
@@ -22,11 +22,12 @@
 import threading
 import unittest
 
-from concurrent import futures
-from dataflow_worker.fn_harness import beam_fn_api_pb2
-from dataflow_worker.fn_harness import data_plane
 import grpc
-import portpicker
+from concurrent import futures
+
+from apache_beam.runners.api import beam_fn_api_pb2
+from apache_beam.runners.worker import data_plane
+from apache_beam.utils import portpicker
 
 
 def timeout(timeout_secs):
diff --git a/sdks/python/apache_beam/runners/worker/log_handler.py b/sdks/python/apache_beam/runners/worker/log_handler.py
index 23e7fa940c2e..8654628a5cb0 100644
--- a/sdks/python/apache_beam/runners/worker/log_handler.py
+++ b/sdks/python/apache_beam/runners/worker/log_handler.py
@@ -18,7 +18,7 @@
 import Queue as queue
 import threading
 
-from dataflow_worker.fn_harness import beam_fn_api_pb2
+from apache_beam.runners.api import beam_fn_api_pb2
 import grpc
 
 
diff --git a/sdks/python/apache_beam/runners/worker/log_handler_test.py b/sdks/python/apache_beam/runners/worker/log_handler_test.py
index 3e6fc097fe6a..cc6982084d84 100644
--- a/sdks/python/apache_beam/runners/worker/log_handler_test.py
+++ b/sdks/python/apache_beam/runners/worker/log_handler_test.py
@@ -14,11 +14,12 @@
 import logging
 import unittest
 
-from concurrent import futures
-from dataflow_worker.fn_harness import beam_fn_api_pb2
-from dataflow_worker.fn_harness import log_handler
 import grpc
-import portpicker
+from concurrent import futures
+
+from apache_beam.runners.api import beam_fn_api_pb2
+from apache_beam.runners.worker import log_handler
+from apache_beam.utils import portpicker
 
 
 class BeamFnLoggingServicer(beam_fn_api_pb2.BeamFnLoggingServicer):
diff --git a/sdks/python/apache_beam/runners/worker/sdk_worker.py b/sdks/python/apache_beam/runners/worker/sdk_worker.py
index b6fad566d5b5..83af2f7e237d 100644
--- a/sdks/python/apache_beam/runners/worker/sdk_worker.py
+++ b/sdks/python/apache_beam/runners/worker/sdk_worker.py
@@ -26,19 +26,19 @@
 import traceback
 import zlib
 
+import dill
+from google.protobuf import wrappers_pb2
+
 from apache_beam.coders import coder_impl
 from apache_beam.coders import WindowedValueCoder
 from apache_beam.internal import pickler
 from apache_beam.runners.dataflow.native_io import iobase
 from apache_beam.utils import counters
-from dataflow_worker import operation_specs
-from dataflow_worker import operations
-from dataflow_worker import statesampler
-from dataflow_worker.fn_harness import beam_fn_api_pb2
-from dataflow_worker.fn_harness.data_plane import GrpcClientDataChannelFactory
-import dill
-
-from google.protobuf import wrappers_pb2
+from apache_beam.runners.api import beam_fn_api_pb2
+from apache_beam.runners.worker import operation_specs
+from apache_beam.runners.worker import operations
+from apache_beam.runners.worker import statesampler
+from apache_beam.runners.worker.data_plane import GrpcClientDataChannelFactory
 
 
 DATA_INPUT_URN = 'urn:org.apache.beam:source:runner:0.1'
diff --git a/sdks/python/apache_beam/runners/worker/sdk_worker_main.py b/sdks/python/apache_beam/runners/worker/sdk_worker_main.py
index 3b4159782ca4..12870ac03388 100644
--- a/sdks/python/apache_beam/runners/worker/sdk_worker_main.py
+++ b/sdks/python/apache_beam/runners/worker/sdk_worker_main.py
@@ -18,13 +18,13 @@
 import os
 import sys
 
-from dataflow_worker.fn_harness import beam_fn_api_pb2
-from dataflow_worker.fn_harness.log_handler import FnApiLogRecordHandler
-from dataflow_worker.fn_harness.sdk_harness import SdkHarness
 import grpc
-
 from google.protobuf import text_format
 
+from apache_beam.runners.api import beam_fn_api_pb2
+from apache_beam.runners.worker.log_handler import FnApiLogRecordHandler
+from apache_beam.runners.worker.sdk_worker import SdkHarness
+
 
 def main(unused_argv):
   """Main entry point for SDK Fn Harness."""
diff --git a/sdks/python/apache_beam/runners/worker/sdk_worker_test.py b/sdks/python/apache_beam/runners/worker/sdk_worker_test.py
index 41453b93530c..4af937ec136d 100644
--- a/sdks/python/apache_beam/runners/worker/sdk_worker_test.py
+++ b/sdks/python/apache_beam/runners/worker/sdk_worker_test.py
@@ -21,14 +21,15 @@
 import logging
 import unittest
 
+import grpc
+from concurrent import futures
+
 from apache_beam.io.concat_source_test import RangeSource
 from apache_beam.io.iobase import SourceBundle
-from concurrent import futures
-from dataflow_worker.fn_harness import beam_fn_api_pb2
-from dataflow_worker.fn_harness import sdk_harness
-from dataflow_worker.fn_harness.data_plane import GrpcClientDataChannelFactory
-import grpc
-import portpicker
+from apache_beam.runners.api import beam_fn_api_pb2
+from apache_beam.runners.worker import sdk_worker
+from apache_beam.runners.worker.data_plane import GrpcClientDataChannelFactory
+from apache_beam.utils import portpicker
 
 
 class BeamFnControlServicer(beam_fn_api_pb2.BeamFnControlServicer):
@@ -81,7 +82,7 @@ def test_fn_registration(self):
     server.start()
 
     channel = grpc.insecure_channel("localhost:%s" % test_port)
-    harness = sdk_harness.SdkHarness(channel)
+    harness = sdk_worker.SdkHarness(channel)
     harness.run()
     self.assertEqual(
         harness.worker.fns,
diff --git a/sdks/python/apache_beam/utils/portpicker.py b/sdks/python/apache_beam/utils/portpicker.py
new file mode 100644
index 000000000000..5c22c8a414a5
--- /dev/null
+++ b/sdks/python/apache_beam/utils/portpicker.py
@@ -0,0 +1,31 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+import contextlib
+import socket
+
+def pick_unused_port():
+  """Attempts to return an unused port.
+
+  There is a window where the port could get re-used after it is closed
+  but before the caller has a chance to bind it again, but that's the
+  best we can do for libraries expecting a port number.
+  """
+  with contextlib.closing(
+      socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s:
+    s.bind(('localhost', 0))
+    return s.getsockname()[1]

From 854caa52ce23af911bdc01a81457df106299c39b Mon Sep 17 00:00:00 2001
From: Robert Bradshaw <robertwb@gmail.com>
Date: Fri, 21 Apr 2017 14:38:12 -0700
Subject: [PATCH 07/33] disble simple state creation

---
 sdks/python/apache_beam/runners/worker/sdk_worker.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/sdks/python/apache_beam/runners/worker/sdk_worker.py b/sdks/python/apache_beam/runners/worker/sdk_worker.py
index 83af2f7e237d..fa818fc45a89 100644
--- a/sdks/python/apache_beam/runners/worker/sdk_worker.py
+++ b/sdks/python/apache_beam/runners/worker/sdk_worker.py
@@ -204,8 +204,8 @@ def __init__(self, control_channel):
 
   def run(self):
     contol_stub = beam_fn_api_pb2.BeamFnControlStub(self._control_channel)
-    state_stub = beam_fn_api_pb2.SimpleBeamFnStateStub(
-        self._control_channel)
+    # TODO(robertwb): Wire up to new state api.
+    state_stub = None
     self.worker = SdkWorker(state_stub, self._data_channel_factory)
 
     responses = queue.Queue()

From 0907f795ee822151cda6f89acccbaf165238c11e Mon Sep 17 00:00:00 2001
From: Robert Bradshaw <robertwb@gmail.com>
Date: Fri, 21 Apr 2017 14:43:28 -0700
Subject: [PATCH 08/33] more package renames

---
 .../runners/portability/maptask_executor_runner_test.py       | 4 +---
 sdks/python/apache_beam/runners/worker/data_plane_test.py     | 2 +-
 sdks/python/apache_beam/runners/worker/sdk_worker_test.py     | 4 ++--
 3 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/sdks/python/apache_beam/runners/portability/maptask_executor_runner_test.py b/sdks/python/apache_beam/runners/portability/maptask_executor_runner_test.py
index f45976c9002b..5f747787cbd4 100644
--- a/sdks/python/apache_beam/runners/portability/maptask_executor_runner_test.py
+++ b/sdks/python/apache_beam/runners/portability/maptask_executor_runner_test.py
@@ -4,8 +4,6 @@
 import tempfile
 import unittest
 
-import google3
-
 import apache_beam as beam
 
 from apache_beam.metrics import Metrics
@@ -18,7 +16,7 @@
 from apache_beam.transforms.util import BeamAssertException
 from apache_beam.transforms.util import equal_to
 from apache_beam.transforms.window import TimestampedValue
-from dataflow_worker import worker_runner_base
+from apache_beam.runners.portability import worker_runner_base
 
 
 class WorkerRunnerBaseTest(unittest.TestCase):
diff --git a/sdks/python/apache_beam/runners/worker/data_plane_test.py b/sdks/python/apache_beam/runners/worker/data_plane_test.py
index 51bbe9dd42f0..83116d57ef16 100644
--- a/sdks/python/apache_beam/runners/worker/data_plane_test.py
+++ b/sdks/python/apache_beam/runners/worker/data_plane_test.py
@@ -11,7 +11,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-"""Tests for dataflow_worker.fn_harness.data_plane."""
+"""Tests for apache_beam.runners.worker.data_plane."""
 
 from __future__ import absolute_import
 from __future__ import division
diff --git a/sdks/python/apache_beam/runners/worker/sdk_worker_test.py b/sdks/python/apache_beam/runners/worker/sdk_worker_test.py
index 4af937ec136d..e7493ba5ef65 100644
--- a/sdks/python/apache_beam/runners/worker/sdk_worker_test.py
+++ b/sdks/python/apache_beam/runners/worker/sdk_worker_test.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-"""Tests for dataflow_worker.fn_harness.sdk_harness."""
+"""Tests for apache_beam.runners.worker.sdk_worker."""
 
 from __future__ import absolute_import
 from __future__ import division
@@ -59,7 +59,7 @@ def Control(self, response_iterator, context):
                        (self.instruction_ids - set(self.responses.keys())))
 
 
-class SdkHarnessTest(unittest.TestCase):
+class SdkWorkerTest(unittest.TestCase):
 
   def test_fn_registration(self):
 

From 052792296cd3add45030ad08e70d2136811afb8e Mon Sep 17 00:00:00 2001
From: Robert Bradshaw <robertwb@gmail.com>
Date: Fri, 21 Apr 2017 15:09:54 -0700
Subject: [PATCH 09/33] Disable unsupported metrics.

---
 .../runners/portability/fn_api_runner.py      | 53 ++++++++++---------
 .../runners/portability/fn_api_runner_test.py | 12 ++---
 .../portability/maptask_executor_runner.py    | 48 ++++++++---------
 .../maptask_executor_runner_test.py           |  6 +--
 4 files changed, 57 insertions(+), 62 deletions(-)

diff --git a/sdks/python/apache_beam/runners/portability/fn_api_runner.py b/sdks/python/apache_beam/runners/portability/fn_api_runner.py
index 6224c75a801a..75bf261729e1 100644
--- a/sdks/python/apache_beam/runners/portability/fn_api_runner.py
+++ b/sdks/python/apache_beam/runners/portability/fn_api_runner.py
@@ -6,6 +6,9 @@
 import Queue as queue
 import threading
 
+import grpc
+from concurrent import futures
+
 import apache_beam as beam
 from apache_beam.coders import WindowedValueCoder
 from apache_beam.coders.coder_impl import create_InputStream
@@ -14,14 +17,12 @@
 from apache_beam.io import iobase
 from apache_beam.metrics.execution import MetricsEnvironment
 from apache_beam.transforms.window import GlobalWindows
-from concurrent import futures
-from dataflow_worker import operation_specs
-from dataflow_worker import worker_runner_base
-from dataflow_worker.fn_harness import beam_fn_api_pb2
-from dataflow_worker.fn_harness import data_plane
-from dataflow_worker.fn_harness import sdk_harness
-import grpc
-import portpicker
+from apache_beam.runners.api import beam_fn_api_pb2
+from apache_beam.runners.portability import maptask_executor_runner
+from apache_beam.runners.worker import data_plane
+from apache_beam.runners.worker import operation_specs
+from apache_beam.runners.worker import sdk_worker
+from apache_beam.utils import portpicker
 
 
 def streaming_rpc_handler(cls, method_name):
@@ -90,14 +91,14 @@ def process(self, source):
      beam.transforms.core.Windowing(GlobalWindows())))
 
 
-class SdkHarnessRunner(worker_runner_base.WorkerRunnerBase):
+class SdkHarnessRunner(maptask_executor_runner.MapTaskExecutorRunner):
 
   def __init__(self):
     super(SdkHarnessRunner, self).__init__()
     self._last_uid = -1
 
-  def _set_metrics_support(self):
-    MetricsEnvironment.set_metrics_supported(False)
+  def has_metrics_support(self):
+    return False
 
   def _next_uid(self):
     self._last_uid += 1
@@ -116,9 +117,9 @@ def _map_task_registration(self, map_task, state_handler,
     def coder_id(coder):
       if coder not in coders:
         coders[coder] = beam_fn_api_pb2.Coder(
-            function_spec=sdk_harness.pack_function_spec_data(
+            function_spec=sdk_worker.pack_function_spec_data(
                 json.dumps(coder.as_cloud_object()),
-                sdk_harness.PYTHON_CODER_URN, id=self._next_uid()))
+                sdk_worker.PYTHON_CODER_URN, id=self._next_uid()))
 
       return coders[coder].function_spec.id
 
@@ -148,7 +149,7 @@ def outputs(op):
       transform_id = transform_index_to_id[op_ix] = self._next_uid()
       if isinstance(operation, operation_specs.WorkerInMemoryWrite):
         # Write this data back to the runner.
-        fn = beam_fn_api_pb2.FunctionSpec(urn=sdk_harness.DATA_OUTPUT_URN,
+        fn = beam_fn_api_pb2.FunctionSpec(urn=sdk_worker.DATA_OUTPUT_URN,
                                           id=self._next_uid())
         if data_operation_spec:
           fn.data.Pack(data_operation_spec)
@@ -173,7 +174,7 @@ def outputs(op):
             element_coder.encode_to_stream(element, output_stream, True)
           target_name = '%s' % self._next_uid()
           input_data[(transform_id, target_name)] = output_stream.get()
-          fn = beam_fn_api_pb2.FunctionSpec(urn=sdk_harness.DATA_INPUT_URN,
+          fn = beam_fn_api_pb2.FunctionSpec(urn=sdk_worker.DATA_INPUT_URN,
                                             id=self._next_uid())
           if data_operation_spec:
             fn.data.Pack(data_operation_spec)
@@ -193,7 +194,7 @@ def outputs(op):
           input_ptransform = beam_fn_api_pb2.PrimitiveTransform(
               id=input_transform_id,
               function_spec=beam_fn_api_pb2.FunctionSpec(
-                  urn=sdk_harness.DATA_INPUT_URN,
+                  urn=sdk_worker.DATA_INPUT_URN,
                   id=self._next_uid()),
               # TODO(robertwb): Possible name collision.
               step_name=stage_name + '/inject_source',
@@ -208,9 +209,9 @@ def outputs(op):
           transforms.append(input_ptransform)
 
           # Read the elements out of the source.
-          fn = sdk_harness.pack_function_spec_data(
+          fn = sdk_worker.pack_function_spec_data(
               OLDE_SOURCE_SPLITTABLE_DOFN_DATA,
-              sdk_harness.PYTHON_DOFN_URN,
+              sdk_worker.PYTHON_DOFN_URN,
               id=self._next_uid())
           inputs = {
               'ignored_input_tag':
@@ -223,9 +224,9 @@ def outputs(op):
           side_inputs = {}
 
       elif isinstance(operation, operation_specs.WorkerDoFn):
-        fn = sdk_harness.pack_function_spec_data(
+        fn = sdk_worker.pack_function_spec_data(
             operation.serialized_fn,
-            sdk_harness.PYTHON_DOFN_URN,
+            sdk_worker.PYTHON_DOFN_URN,
             id=self._next_uid())
         inputs = as_target(operation.input)
         # Store the contents of each side input for state access.
@@ -235,8 +236,8 @@ def outputs(op):
           view_id = self._next_uid()
           # TODO(robertwb): Actually flesh out the ViewFn API.
           side_inputs[si.tag] = beam_fn_api_pb2.SideInput(
-              view_fn=sdk_harness.serialize_and_pack_py_fn(
-                  element_coder, urn=sdk_harness.PYTHON_ITERABLE_VIEWFN_URN,
+              view_fn=sdk_worker.serialize_and_pack_py_fn(
+                  element_coder, urn=sdk_worker.PYTHON_ITERABLE_VIEWFN_URN,
                   id=view_id))
           # Re-encode the elements in the nested context and
           # concatenate them together
@@ -253,9 +254,9 @@ def outputs(op):
                   state_key=state_key, data=[elements_data]))
 
       elif isinstance(operation, operation_specs.WorkerFlatten):
-        fn = sdk_harness.pack_function_spec_data(
+        fn = sdk_worker.pack_function_spec_data(
             operation.serialized_fn,
-            sdk_harness.IDENTITY_DOFN_URN,
+            sdk_worker.IDENTITY_DOFN_URN,
             id=self._next_uid())
         inputs = {
             'ignored_input_tag':
@@ -387,7 +388,7 @@ def __init__(self):
       self.state_handler = SdkHarnessRunner.SimpleState()
       self.control_handler = self
       self.data_plane_handler = data_plane.InMemoryDataChannel()
-      self.worker = sdk_harness.SdkWorker(
+      self.worker = sdk_worker.SdkWorker(
           self.state_handler, data_plane.InMemoryDataChannelFactory(
               self.data_plane_handler.inverse()))
 
@@ -437,7 +438,7 @@ def __init__(self):
       self.data_server.start()
       self.control_server.start()
 
-      self.worker = sdk_harness.SdkHarness(
+      self.worker = sdk_worker.SdkHarness(
           grpc.insecure_channel('localhost:%s' % self.control_port))
       self.worker_thread = threading.Thread(target=self.worker.run)
       logging.info('starting worker')
diff --git a/sdks/python/apache_beam/runners/portability/fn_api_runner_test.py b/sdks/python/apache_beam/runners/portability/fn_api_runner_test.py
index 9d700f0e87c3..2de12d6c92a8 100644
--- a/sdks/python/apache_beam/runners/portability/fn_api_runner_test.py
+++ b/sdks/python/apache_beam/runners/portability/fn_api_runner_test.py
@@ -3,18 +3,15 @@
 import logging
 import unittest
 
-import google3
-
 import apache_beam as beam
-from dataflow_worker import worker_runner_base_test
-from dataflow_worker.fn_harness import sdk_harness_runner
-import faulthandler
+from apache_beam.runners.portability import worker_runner_base_test
+from apache_beam.runners.portability import fn_api_runner
 
 
-class SdkHarnessRunnerTest(worker_runner_base_test.WorkerRunnerBaseTest):
+class SdkWorkerRunnerTest(worker_runner_base_test.MapTaskExecutorRunner):
 
   def create_pipeline(self):
-    return beam.Pipeline(runner=sdk_harness_runner.SdkHarnessRunner())
+    return beam.Pipeline(runner=sdk_harness_runner.SdkWorkerRunner())
 
   def test_combine_per_key(self):
     # TODO(robertwb): Implement PGBKCV operation.
@@ -25,5 +22,4 @@ def test_combine_per_key(self):
 
 if __name__ == '__main__':
   logging.getLogger().setLevel(logging.INFO)
-  faulthandler.enable()
   unittest.main()
diff --git a/sdks/python/apache_beam/runners/portability/maptask_executor_runner.py b/sdks/python/apache_beam/runners/portability/maptask_executor_runner.py
index 7e1306562441..7401b3fe6ae3 100644
--- a/sdks/python/apache_beam/runners/portability/maptask_executor_runner.py
+++ b/sdks/python/apache_beam/runners/portability/maptask_executor_runner.py
@@ -4,8 +4,6 @@
 import logging
 import time
 
-import google3
-
 import apache_beam as beam
 from apache_beam.internal import pickler
 from apache_beam.io import iobase
@@ -16,16 +14,17 @@
 from apache_beam.runners.runner import PipelineResult
 from apache_beam.runners.runner import PipelineRunner
 from apache_beam.runners.runner import PipelineState
+from apache_beam.runners.worker import operation_specs
+from apache_beam.runners.worker import operation_specs as maptask
+from apache_beam.runners.worker import operations
+from apache_beam.runners.worker import statesampler
 from apache_beam.typehints import typehints
 from apache_beam.utils import pipeline_options
 from apache_beam.utils import profiler
 from apache_beam.utils.counters import CounterFactory
-from dataflow_worker import operation_specs as maptask
-from dataflow_worker import operations
-from dataflow_worker import statesampler
 
 
-class WorkerRunnerBase(PipelineRunner):
+class MapTaskExecutorRunner(PipelineRunner):
   """Beam runner translating a pipeline into map tasks that are then executed.
 
   Primarily intended for testing and profiling the worker code paths.
@@ -34,17 +33,15 @@ class WorkerRunnerBase(PipelineRunner):
   def __init__(self):
     self.executors = []
 
-  def _set_metrics_support(self):
-    """Set whether this runner supports metrics or not.
-
-    Can be set to False by subclass runners.
+  def has_metrics_support(self):
+    """Returns whether this runner supports metrics or not.
     """
-    MetricsEnvironment.set_metrics_supported(True)
+    return False
 
   def run(self, pipeline):
-    self._set_metrics_support()
+    MetricsEnvironment.set_metrics_supported(self.has_metrics_support())
     # List of map tasks  Each map task is a list of
-    # (stage_name, maptask.WorkerOperation) instructions.
+    # (stage_name, operation_specs.WorkerOperation) instructions.
     self.map_tasks = []
 
     # Map of pvalues to
@@ -59,7 +56,7 @@ def run(self, pipeline):
     self.dependencies = collections.defaultdict(set)
 
     # Visit the graph, building up the map_tasks and their metadata.
-    super(WorkerRunnerBase, self).run(pipeline)
+    super(MapTaskExecutorRunner, self).run(pipeline)
 
     # Now run the tasks in topological order.
     def compute_depth_map(deps):
@@ -111,7 +108,7 @@ def execute_map_tasks(self, ordered_map_tasks):
       counter_factory = CounterFactory()
       state_sampler = statesampler.StateSampler('%s-' % ix, counter_factory)
       map_executor = operations.SimpleMapTaskExecutor(
-          maptask.MapTask(
+          operation_specs.MapTask(
               all_operations, 'S%02d' % ix, system_names, stage_names),
           counter_factory,
           state_sampler)
@@ -130,7 +127,7 @@ def _run_read_from(self, transform_node, source):
       source = iobase.SourceBundle(1.0, source, None, None)
     output = transform_node.outputs[None]
     element_coder = self._get_coder(output)
-    read_op = maptask.WorkerRead(source, output_coders=[element_coder])
+    read_op = operation_specs.WorkerRead(source, output_coders=[element_coder])
     self.outputs[output] = len(self.map_tasks), 0, 0
     self.map_tasks.append([(transform_node.full_label, read_op)])
     return len(self.map_tasks) - 1
@@ -157,7 +154,7 @@ def is_reachable(leaf, root):
 
       output_buffer = OutputBuffer(input_element_coder)
 
-      fusion_break_write = maptask.WorkerInMemoryWrite(
+      fusion_break_write = operation_specs.WorkerInMemoryWrite(
           output_buffer=output_buffer,
           write_windowed_values=True,
           input=(producer_index, output_index),
@@ -168,7 +165,7 @@ def is_reachable(leaf, root):
       original_map_task_index = map_task_index
       map_task_index, producer_index, output_index = len(self.map_tasks), 0, 0
 
-      fusion_break_read = maptask.WorkerRead(
+      fusion_break_read = operation_specs.WorkerRead(
           output_buffer.source_bundle(),
           output_coders=[input_element_coder])
       self.map_tasks.append(
@@ -180,9 +177,10 @@ def create_side_read(side_input):
       label = self.side_input_labels[side_input]
       output_buffer = self.run_side_write(
           side_input.pvalue, '%s/%s' % (transform_node.full_label, label))
-      return maptask.WorkerSideInputSource(output_buffer.source(), label)
+      return operation_specs.WorkerSideInputSource(
+          output_buffer.source(), label)
 
-    do_op = maptask.WorkerDoFn(  #
+    do_op = operation_specs.WorkerDoFn(  #
         serialized_fn=pickler.dumps(DataflowRunner._pardo_fn_data(
             transform_node,
             lambda side_input: self.side_input_labels[side_input])),
@@ -212,7 +210,7 @@ def run_side_write(self, pcoll, label):
 
     windowed_element_coder = self._get_coder(pcoll)
     output_buffer = OutputBuffer(windowed_element_coder)
-    write_sideinput_op = maptask.WorkerInMemoryWrite(
+    write_sideinput_op = operation_specs.WorkerInMemoryWrite(
         output_buffer=output_buffer,
         write_windowed_values=True,
         input=(producer_index, output_index),
@@ -229,7 +227,7 @@ def run_GroupByKeyOnly(self, transform_node):
     windowed_ungrouped_element_coder = self._get_coder(transform_node.inputs[0])
 
     output_buffer = GroupingOutputBuffer(grouped_element_coder)
-    shuffle_write = maptask.WorkerInMemoryWrite(
+    shuffle_write = operation_specs.WorkerInMemoryWrite(
         output_buffer=output_buffer,
         write_windowed_values=False,
         input=(producer_index, output_index),
@@ -249,7 +247,7 @@ def run_Flatten(self, transform_node):
     for input in transform_node.inputs:
       map_task_index, producer_index, output_index = self.outputs[input]
       element_coder = self._get_coder(input)
-      flatten_write = maptask.WorkerInMemoryWrite(output_buffer=output_buffer,
+      flatten_write = operation_specs.WorkerInMemoryWrite(output_buffer=output_buffer,
                                                   write_windowed_values=True,
                                                   input=(producer_index,
                                                          output_index),
@@ -270,7 +268,7 @@ def apply_CombinePerKey(self, transform, input):
   def run_PartialGroupByKeyCombineValues(self, transform_node):
     element_coder = self._get_coder(transform_node.outputs[None])
     _, producer_index, output_index = self.outputs[transform_node.inputs[0]]
-    combine_op = maptask.WorkerPartialGroupByKey(
+    combine_op = operation_specs.WorkerPartialGroupByKey(
         combine_fn=pickler.dumps((transform_node.transform.combine_fn, (), {},
                                   ())),
         output_coders=[element_coder],
@@ -287,7 +285,7 @@ def _run_combine_transform(self, transform_node, phase):
     transform = transform_node.transform
     element_coder = self._get_coder(transform_node.outputs[None])
     _, producer_index, output_index = self.outputs[transform_node.inputs[0]]
-    combine_op = maptask.WorkerCombineFn(serialized_fn=pickler.dumps(
+    combine_op = operation_specs.WorkerCombineFn(serialized_fn=pickler.dumps(
         (transform.combine_fn, (), {}, ())),
                                          phase=phase,
                                          output_coders=[element_coder],
diff --git a/sdks/python/apache_beam/runners/portability/maptask_executor_runner_test.py b/sdks/python/apache_beam/runners/portability/maptask_executor_runner_test.py
index 5f747787cbd4..cd20cc17d9e4 100644
--- a/sdks/python/apache_beam/runners/portability/maptask_executor_runner_test.py
+++ b/sdks/python/apache_beam/runners/portability/maptask_executor_runner_test.py
@@ -16,13 +16,13 @@
 from apache_beam.transforms.util import BeamAssertException
 from apache_beam.transforms.util import equal_to
 from apache_beam.transforms.window import TimestampedValue
-from apache_beam.runners.portability import worker_runner_base
+from apache_beam.runners.portability import maptask_executor_runner
 
 
-class WorkerRunnerBaseTest(unittest.TestCase):
+class MapTaskExecutorRunnerTest(unittest.TestCase):
 
   def create_pipeline(self):
-    return beam.Pipeline(runner=worker_runner_base.WorkerRunnerBase())
+    return beam.Pipeline(runner=maptask_executor_runner.MapTaskExecutorRunner())
 
   def test_assert_that(self):
     with self.assertRaises(BeamAssertException):

From 55b64d598f1aeabb646b30b85e577ea61b1672f7 Mon Sep 17 00:00:00 2001
From: Robert Bradshaw <robertwb@gmail.com>
Date: Fri, 21 Apr 2017 15:15:07 -0700
Subject: [PATCH 10/33] Rename to FnApiRunner

---
 .../apache_beam/runners/portability/fn_api_runner.py | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/sdks/python/apache_beam/runners/portability/fn_api_runner.py b/sdks/python/apache_beam/runners/portability/fn_api_runner.py
index 75bf261729e1..42bd7d1f2aca 100644
--- a/sdks/python/apache_beam/runners/portability/fn_api_runner.py
+++ b/sdks/python/apache_beam/runners/portability/fn_api_runner.py
@@ -91,10 +91,10 @@ def process(self, source):
      beam.transforms.core.Windowing(GlobalWindows())))
 
 
-class SdkHarnessRunner(maptask_executor_runner.MapTaskExecutorRunner):
+class FnApiRunner(maptask_executor_runner.MapTaskExecutorRunner):
 
   def __init__(self):
-    super(SdkHarnessRunner, self).__init__()
+    super(FnApiRunner, self).__init__()
     self._last_uid = -1
 
   def has_metrics_support(self):
@@ -343,9 +343,9 @@ def _run_map_task(
 
   def execute_map_tasks(self, ordered_map_tasks, direct=True):
     if direct:
-      controller = SdkHarnessRunner.DirectController()
+      controller = FnApiRunner.DirectController()
     else:
-      controller = SdkHarnessRunner.GrpcController()
+      controller = FnApiRunner.GrpcController()
 
     try:
       for _, map_task in ordered_map_tasks:
@@ -385,7 +385,7 @@ class DirectController(object):
 
     def __init__(self):
       self._responses = []
-      self.state_handler = SdkHarnessRunner.SimpleState()
+      self.state_handler = FnApiRunner.SimpleState()
       self.control_handler = self
       self.data_plane_handler = data_plane.InMemoryDataChannel()
       self.worker = sdk_worker.SdkWorker(
@@ -414,7 +414,7 @@ class GrpcController(object):
     """An grpc based controller for fn API control, state and data planes."""
 
     def __init__(self):
-      self.state_handler = SdkHarnessRunner.SimpleState()
+      self.state_handler = FnApiRunner.SimpleState()
       self.control_server = grpc.server(
           futures.ThreadPoolExecutor(max_workers=10))
       self.control_port = portpicker.pick_unused_port()

From 9f7eefe0d2c080d6e8613d808909ac347afd7d38 Mon Sep 17 00:00:00 2001
From: Robert Bradshaw <robertwb@gmail.com>
Date: Fri, 21 Apr 2017 15:18:28 -0700
Subject: [PATCH 11/33] more import/name fixes

---
 .../apache_beam/runners/portability/fn_api_runner_test.py   | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/sdks/python/apache_beam/runners/portability/fn_api_runner_test.py b/sdks/python/apache_beam/runners/portability/fn_api_runner_test.py
index 2de12d6c92a8..10d0d288a94e 100644
--- a/sdks/python/apache_beam/runners/portability/fn_api_runner_test.py
+++ b/sdks/python/apache_beam/runners/portability/fn_api_runner_test.py
@@ -4,14 +4,14 @@
 import unittest
 
 import apache_beam as beam
-from apache_beam.runners.portability import worker_runner_base_test
 from apache_beam.runners.portability import fn_api_runner
+from apache_beam.runners.portability import maptask_executor_runner
 
 
-class SdkWorkerRunnerTest(worker_runner_base_test.MapTaskExecutorRunner):
+class FnApiRunnerTest(maptask_executor_runner.MapTaskExecutorRunner):
 
   def create_pipeline(self):
-    return beam.Pipeline(runner=sdk_harness_runner.SdkWorkerRunner())
+    return beam.Pipeline(runner=fn_api_runner.FnApiRunner())
 
   def test_combine_per_key(self):
     # TODO(robertwb): Implement PGBKCV operation.

From d9529e3d4540608f4c208ef6f3c47af2cce3e08f Mon Sep 17 00:00:00 2001
From: Robert Bradshaw <robertwb@gmail.com>
Date: Fri, 21 Apr 2017 15:19:18 -0700
Subject: [PATCH 12/33] Remove GRPC inheritance.

---
 sdks/python/apache_beam/runners/portability/fn_api_runner.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdks/python/apache_beam/runners/portability/fn_api_runner.py b/sdks/python/apache_beam/runners/portability/fn_api_runner.py
index 42bd7d1f2aca..6257a0d52754 100644
--- a/sdks/python/apache_beam/runners/portability/fn_api_runner.py
+++ b/sdks/python/apache_beam/runners/portability/fn_api_runner.py
@@ -356,7 +356,7 @@ def execute_map_tasks(self, ordered_map_tasks, direct=True):
     finally:
       controller.close()
 
-  class SimpleState(beam_fn_api_pb2.SimpleBeamFnStateServicer):
+  class SimpleState(object):  # TODO(robertwb): Inherit from GRPC servicer.
 
     def __init__(self):
       self._all = collections.defaultdict(list)

From 128f187d5a3b61ac4435025c6a40b5c7f2a9f852 Mon Sep 17 00:00:00 2001
From: Robert Bradshaw <robertwb@gmail.com>
Date: Fri, 21 Apr 2017 16:59:13 -0700
Subject: [PATCH 13/33] Fix tests in non-compiled mode.

---
 .../portability/maptask_executor_runner.py    |  5 ++-
 .../apache_beam/runners/worker/data_plane.py  |  1 +
 .../apache_beam/runners/worker/operations.py  | 13 ++++++-
 .../apache_beam/runners/worker/sdk_worker.py  |  6 +++-
 .../runners/worker/statesampler_fake.py       | 34 +++++++++++++++++++
 .../runners/worker/statesampler_test.py       |  8 ++++-
 sdks/python/apache_beam/utils/portpicker.py   |  1 +
 7 files changed, 64 insertions(+), 4 deletions(-)
 create mode 100644 sdks/python/apache_beam/runners/worker/statesampler_fake.py

diff --git a/sdks/python/apache_beam/runners/portability/maptask_executor_runner.py b/sdks/python/apache_beam/runners/portability/maptask_executor_runner.py
index 7401b3fe6ae3..c1f0d58ee552 100644
--- a/sdks/python/apache_beam/runners/portability/maptask_executor_runner.py
+++ b/sdks/python/apache_beam/runners/portability/maptask_executor_runner.py
@@ -17,7 +17,10 @@
 from apache_beam.runners.worker import operation_specs
 from apache_beam.runners.worker import operation_specs as maptask
 from apache_beam.runners.worker import operations
-from apache_beam.runners.worker import statesampler
+try:
+  from apache_beam.runners.worker import statesampler
+except ImportError:
+  from apache_beam.runners.worker import statesampler_fake as statesampler
 from apache_beam.typehints import typehints
 from apache_beam.utils import pipeline_options
 from apache_beam.utils import profiler
diff --git a/sdks/python/apache_beam/runners/worker/data_plane.py b/sdks/python/apache_beam/runners/worker/data_plane.py
index cde37cd0b156..37e6369a2ee2 100644
--- a/sdks/python/apache_beam/runners/worker/data_plane.py
+++ b/sdks/python/apache_beam/runners/worker/data_plane.py
@@ -32,6 +32,7 @@ class ClosableOutputStream(type(coder_impl.create_OutputStream())):
   """A Outputstream for use with CoderImpls that has a close() method."""
 
   def __init__(self, close_callback=None):
+    super(ClosableOutputStream, self).__init__()
     self._close_callback = close_callback
 
   def close(self):
diff --git a/sdks/python/apache_beam/runners/worker/operations.py b/sdks/python/apache_beam/runners/worker/operations.py
index 5a5f4283e2e2..c7c23adb303f 100644
--- a/sdks/python/apache_beam/runners/worker/operations.py
+++ b/sdks/python/apache_beam/runners/worker/operations.py
@@ -29,6 +29,7 @@
 from apache_beam.runners.common import Receiver
 from apache_beam.runners.dataflow.internal.names import PropertyNames
 from apache_beam.runners.worker import logger
+from apache_beam.runners.worker import opcounters
 from apache_beam.runners.worker import operation_specs
 from apache_beam.runners.worker import sideinputs
 from apache_beam.transforms import combiners
@@ -38,7 +39,17 @@
 from apache_beam.transforms.combiners import PhasedCombineFnExecutor
 from apache_beam.transforms.window import GlobalWindows
 from apache_beam.utils.windowed_value import WindowedValue
-import cython
+
+# Allow some "pure mode" declarations.
+try:
+  import cython
+except ImportError:
+  class FakeCython(object):
+    @staticmethod
+    def cast(type, value):
+      return value
+  globals()['cython'] = FakeCython()
+
 
 _globally_windowed_value = GlobalWindows.windowed_value(None)
 _global_window_type = type(_globally_windowed_value.windows[0])
diff --git a/sdks/python/apache_beam/runners/worker/sdk_worker.py b/sdks/python/apache_beam/runners/worker/sdk_worker.py
index fa818fc45a89..65c88a6f63f2 100644
--- a/sdks/python/apache_beam/runners/worker/sdk_worker.py
+++ b/sdks/python/apache_beam/runners/worker/sdk_worker.py
@@ -19,6 +19,7 @@
 from __future__ import print_function
 
 import collections
+import contextlib
 import json
 import logging
 import Queue as queue
@@ -37,7 +38,10 @@
 from apache_beam.runners.api import beam_fn_api_pb2
 from apache_beam.runners.worker import operation_specs
 from apache_beam.runners.worker import operations
-from apache_beam.runners.worker import statesampler
+try:
+  from apache_beam.runners.worker import statesampler
+except ImportError:
+  from apache_beam.runners.worker import statesampler_fake as statesampler
 from apache_beam.runners.worker.data_plane import GrpcClientDataChannelFactory
 
 
diff --git a/sdks/python/apache_beam/runners/worker/statesampler_fake.py b/sdks/python/apache_beam/runners/worker/statesampler_fake.py
new file mode 100644
index 000000000000..f354a8324613
--- /dev/null
+++ b/sdks/python/apache_beam/runners/worker/statesampler_fake.py
@@ -0,0 +1,34 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+
+class StateSampler(object):
+
+  def __init__(*args, **kwargs):
+    pass
+
+  def scoped_state(self, name):
+    return _FakeScopedState()
+
+
+class _FakeScopedState(object):
+
+  def __enter__(self):
+    pass
+
+  def __exit__(self, *unused_args):
+    pass
diff --git a/sdks/python/apache_beam/runners/worker/statesampler_test.py b/sdks/python/apache_beam/runners/worker/statesampler_test.py
index 955b06bf8284..3331c5c5c5d7 100644
--- a/sdks/python/apache_beam/runners/worker/statesampler_test.py
+++ b/sdks/python/apache_beam/runners/worker/statesampler_test.py
@@ -18,7 +18,6 @@
 import time
 import unittest
 
-from apache_beam.runners.worker import statesampler
 from apache_beam.utils.counters import CounterFactory
 
 
@@ -84,6 +83,13 @@ def test_sampler_transition_overhead(self):
     self.assertLess(overhead_us, 10.0)
 
 
+try:
+  from apache_beam.runners.worker import statesampler
+except ImportError:
+  # No compiler.
+  del StateSamplerTest
+
+
 if __name__ == '__main__':
   logging.getLogger().setLevel(logging.INFO)
   unittest.main()
diff --git a/sdks/python/apache_beam/utils/portpicker.py b/sdks/python/apache_beam/utils/portpicker.py
index 5c22c8a414a5..0a5bd5147b34 100644
--- a/sdks/python/apache_beam/utils/portpicker.py
+++ b/sdks/python/apache_beam/utils/portpicker.py
@@ -18,6 +18,7 @@
 import contextlib
 import socket
 
+
 def pick_unused_port():
   """Attempts to return an unused port.
 

From 8c69775ab1b3e9bae395b9b82e467e8309dba288 Mon Sep 17 00:00:00 2001
From: Robert Bradshaw <robertwb@google.com>
Date: Mon, 24 Apr 2017 16:23:35 -0500
Subject: [PATCH 14/33] lint

---
 sdks/python/apache_beam/runners/worker/data_plane_test.py | 1 +
 sdks/python/apache_beam/runners/worker/sdk_worker.py      | 3 +--
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/sdks/python/apache_beam/runners/worker/data_plane_test.py b/sdks/python/apache_beam/runners/worker/data_plane_test.py
index 83116d57ef16..571af9ef7819 100644
--- a/sdks/python/apache_beam/runners/worker/data_plane_test.py
+++ b/sdks/python/apache_beam/runners/worker/data_plane_test.py
@@ -33,6 +33,7 @@
 def timeout(timeout_secs):
   def decorate(fn):
     exc_info = []
+
     def wrapper(*args, **kwargs):
       def call_fn():
         try:
diff --git a/sdks/python/apache_beam/runners/worker/sdk_worker.py b/sdks/python/apache_beam/runners/worker/sdk_worker.py
index 65c88a6f63f2..bd39c5eb1b53 100644
--- a/sdks/python/apache_beam/runners/worker/sdk_worker.py
+++ b/sdks/python/apache_beam/runners/worker/sdk_worker.py
@@ -214,6 +214,7 @@ def run(self):
 
     responses = queue.Queue()
     no_more_work = object()
+
     def get_responses():
       while True:
         response = responses.get()
@@ -446,5 +447,3 @@ def process_bundle(self, request, instruction_id):
       op.finish()
 
     return beam_fn_api_pb2.ProcessBundleResponse()
-
-

From 94598b6849f9151507885e30ed7dc6d67163958e Mon Sep 17 00:00:00 2001
From: Robert Bradshaw <robertwb@google.com>
Date: Mon, 24 Apr 2017 17:37:44 -0500
Subject: [PATCH 15/33] lint

---
 .../apache_beam/runners/worker/logger_test.py       |  6 ------
 .../python/apache_beam/runners/worker/operations.py | 13 +++----------
 .../python/apache_beam/runners/worker/sideinputs.py |  1 +
 3 files changed, 4 insertions(+), 16 deletions(-)

diff --git a/sdks/python/apache_beam/runners/worker/logger_test.py b/sdks/python/apache_beam/runners/worker/logger_test.py
index 04df1822196a..2d90a1716136 100644
--- a/sdks/python/apache_beam/runners/worker/logger_test.py
+++ b/sdks/python/apache_beam/runners/worker/logger_test.py
@@ -32,11 +32,6 @@ def thread_check_attribute(self, name):
           logger.per_thread_worker_data.get_data()[name], 'thread-value')
     self.assertFalse(name in logger.per_thread_worker_data.get_data())
 
-  def test_no_positional_args(self):
-    with self.assertRaises(TypeError):
-      with logger.PerThreadLoggingContext('something'):
-        pass
-
   def test_per_thread_attribute(self):
     self.assertFalse('xyz' in logger.per_thread_worker_data.get_data())
     with logger.PerThreadLoggingContext(xyz='value'):
@@ -181,4 +176,3 @@ def test_exception_record(self):
 if __name__ == '__main__':
   logging.getLogger().setLevel(logging.INFO)
   unittest.main()
-
diff --git a/sdks/python/apache_beam/runners/worker/operations.py b/sdks/python/apache_beam/runners/worker/operations.py
index c7c23adb303f..da4adce32d86 100644
--- a/sdks/python/apache_beam/runners/worker/operations.py
+++ b/sdks/python/apache_beam/runners/worker/operations.py
@@ -240,10 +240,6 @@ def __missing__(self, unused_key):
 class DoOperation(Operation):
   """A Do operation that will execute a custom DoFn for each input element."""
 
-  def __init__(self, operation_name, spec, counter_factory, state_sampler):
-    super(DoOperation, self).__init__(
-        operation_name, spec, counter_factory, state_sampler)
-
   def _read_side_inputs(self, tags_and_types):
     """Generator reading side inputs in the order prescribed by tags_and_types.
 
@@ -502,10 +498,6 @@ class FlattenOperation(Operation):
   with all the items.
   """
 
-  def __init__(self, operation_name, spec, counter_factory, state_sampler):
-    super(FlattenOperation, self).__init__(
-        operation_name, spec, counter_factory, state_sampler)
-
   def process(self, o):
     if self.debug_logging_enabled:
       logging.debug('Processing [%s] in %s', o, self)
@@ -621,8 +613,9 @@ def execute(self):
         operation_specs.Worker* classes.
     """
 
-    # operations is a list of operation_specs.Worker* instances. The order of the
-    # elements is important because the inputs use list indexes as references.
+    # operations is a list of operation_specs.Worker* instances.
+    # The order of the elements is important because the inputs use
+    # list indexes as references.
 
     step_names = (
         self._map_task.step_names or [None] * len(self._map_task.operations))
diff --git a/sdks/python/apache_beam/runners/worker/sideinputs.py b/sdks/python/apache_beam/runners/worker/sideinputs.py
index cc9e5e23898d..76981669cb56 100644
--- a/sdks/python/apache_beam/runners/worker/sideinputs.py
+++ b/sdks/python/apache_beam/runners/worker/sideinputs.py
@@ -74,6 +74,7 @@ def _start_reader_threads(self):
       self.reader_threads.append(t)
 
   def _reader_thread(self):
+    # pylint: disable=too-many-nested-blocks
     try:
       while True:
         try:

From c7a61bd511cc2c96298cdbf491408ceb8624d91e Mon Sep 17 00:00:00 2001
From: Robert Bradshaw <robertwb@google.com>
Date: Mon, 24 Apr 2017 17:38:34 -0500
Subject: [PATCH 16/33] fake state sampler lint

---
 sdks/python/apache_beam/runners/worker/statesampler_fake.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdks/python/apache_beam/runners/worker/statesampler_fake.py b/sdks/python/apache_beam/runners/worker/statesampler_fake.py
index f354a8324613..efd7f2db8fcd 100644
--- a/sdks/python/apache_beam/runners/worker/statesampler_fake.py
+++ b/sdks/python/apache_beam/runners/worker/statesampler_fake.py
@@ -18,7 +18,7 @@
 
 class StateSampler(object):
 
-  def __init__(*args, **kwargs):
+  def __init__(self, *args, **kwargs):
     pass
 
   def scoped_state(self, name):

From 28a00a1548f3f6f5342940a94964933e082122ec Mon Sep 17 00:00:00 2001
From: Robert Bradshaw <robertwb@google.com>
Date: Mon, 24 Apr 2017 17:53:47 -0500
Subject: [PATCH 17/33] more lint

---
 .../runners/portability/fn_api_runner.py      |  3 +-
 .../portability/maptask_executor_runner.py    | 26 ++++++-------
 .../maptask_executor_runner_test.py           | 37 +++++++++++--------
 .../runners/worker/operation_specs.py         |  2 +-
 .../apache_beam/runners/worker/sdk_worker.py  |  1 -
 .../runners/worker/statesampler_test.py       |  1 -
 6 files changed, 36 insertions(+), 34 deletions(-)

diff --git a/sdks/python/apache_beam/runners/portability/fn_api_runner.py b/sdks/python/apache_beam/runners/portability/fn_api_runner.py
index 6257a0d52754..5a68c4e473eb 100644
--- a/sdks/python/apache_beam/runners/portability/fn_api_runner.py
+++ b/sdks/python/apache_beam/runners/portability/fn_api_runner.py
@@ -15,7 +15,6 @@
 from apache_beam.coders.coder_impl import create_OutputStream
 from apache_beam.internal import pickler
 from apache_beam.io import iobase
-from apache_beam.metrics.execution import MetricsEnvironment
 from apache_beam.transforms.window import GlobalWindows
 from apache_beam.runners.api import beam_fn_api_pb2
 from apache_beam.runners.portability import maptask_executor_runner
@@ -320,7 +319,7 @@ def _run_map_task(
         expected_targets = [
             beam_fn_api_pb2.Target(primitive_transform_reference=transform_id,
                                    name=output_name)
-            for (transform_id, output_name) in sinks.keys()]
+            for (transform_id, output_name), _ in sinks.items()]
         for output in data_plane_handler.input_elements(
             process_bundle.instruction_id, expected_targets):
           target_tuple = (
diff --git a/sdks/python/apache_beam/runners/portability/maptask_executor_runner.py b/sdks/python/apache_beam/runners/portability/maptask_executor_runner.py
index c1f0d58ee552..ec42bf5320b1 100644
--- a/sdks/python/apache_beam/runners/portability/maptask_executor_runner.py
+++ b/sdks/python/apache_beam/runners/portability/maptask_executor_runner.py
@@ -15,7 +15,6 @@
 from apache_beam.runners.runner import PipelineRunner
 from apache_beam.runners.runner import PipelineState
 from apache_beam.runners.worker import operation_specs
-from apache_beam.runners.worker import operation_specs as maptask
 from apache_beam.runners.worker import operations
 try:
   from apache_beam.runners.worker import statesampler
@@ -250,11 +249,11 @@ def run_Flatten(self, transform_node):
     for input in transform_node.inputs:
       map_task_index, producer_index, output_index = self.outputs[input]
       element_coder = self._get_coder(input)
-      flatten_write = operation_specs.WorkerInMemoryWrite(output_buffer=output_buffer,
-                                                  write_windowed_values=True,
-                                                  input=(producer_index,
-                                                         output_index),
-                                                  output_coders=[element_coder])
+      flatten_write = operation_specs.WorkerInMemoryWrite(
+          output_buffer=output_buffer,
+          write_windowed_values=True,
+          input=(producer_index, output_index),
+          output_coders=[element_coder])
       self.map_tasks[map_task_index].append(
           (transform_node.full_label + '/Write', flatten_write))
       self.dependencies[output_map_task].add(map_task_index)
@@ -272,8 +271,8 @@ def run_PartialGroupByKeyCombineValues(self, transform_node):
     element_coder = self._get_coder(transform_node.outputs[None])
     _, producer_index, output_index = self.outputs[transform_node.inputs[0]]
     combine_op = operation_specs.WorkerPartialGroupByKey(
-        combine_fn=pickler.dumps((transform_node.transform.combine_fn, (), {},
-                                  ())),
+        combine_fn=pickler.dumps(
+            (transform_node.transform.combine_fn, (), {}, ())),
         output_coders=[element_coder],
         input=(producer_index, output_index))
     self._run_as_op(transform_node, combine_op)
@@ -288,11 +287,12 @@ def _run_combine_transform(self, transform_node, phase):
     transform = transform_node.transform
     element_coder = self._get_coder(transform_node.outputs[None])
     _, producer_index, output_index = self.outputs[transform_node.inputs[0]]
-    combine_op = operation_specs.WorkerCombineFn(serialized_fn=pickler.dumps(
-        (transform.combine_fn, (), {}, ())),
-                                         phase=phase,
-                                         output_coders=[element_coder],
-                                         input=(producer_index, output_index))
+    combine_op = operation_specs.WorkerCombineFn(
+        serialized_fn=pickler.dumps(
+            (transform.combine_fn, (), {}, ())),
+        phase=phase,
+        output_coders=[element_coder],
+        input=(producer_index, output_index))
     self._run_as_op(transform_node, combine_op)
 
   def _get_coder(self, pvalue, windowed=True):
diff --git a/sdks/python/apache_beam/runners/portability/maptask_executor_runner_test.py b/sdks/python/apache_beam/runners/portability/maptask_executor_runner_test.py
index cd20cc17d9e4..473c71aa46e4 100644
--- a/sdks/python/apache_beam/runners/portability/maptask_executor_runner_test.py
+++ b/sdks/python/apache_beam/runners/portability/maptask_executor_runner_test.py
@@ -35,9 +35,10 @@ def test_create(self):
 
   def test_pardo(self):
     with self.create_pipeline() as p:
-      res = (p | beam.Create(['a', 'bc'])
-               | beam.Map(lambda e: e * 2)
-               | beam.Map(lambda e: e + 'x'))
+      res = (p
+             | beam.Create(['a', 'bc'])
+             | beam.Map(lambda e: e * 2)
+             | beam.Map(lambda e: e + 'x'))
       assert_that(res, equal_to(['aax', 'bcbcx']))
 
   def test_pardo_metrics(self):
@@ -88,8 +89,9 @@ def tee(elem, *tags):
         if tag in elem:
           yield beam.pvalue.OutputValue(tag, elem)
     with self.create_pipeline() as p:
-      xy = (p | 'Create' >> beam.Create(['x', 'y', 'xy'])
-              | beam.FlatMap(tee, 'x', 'y').with_outputs())
+      xy = (p
+            | 'Create' >> beam.Create(['x', 'y', 'xy'])
+            | beam.FlatMap(tee, 'x', 'y').with_outputs())
       assert_that(xy.x, equal_to(['x', 'xy']), label='x')
       assert_that(xy.y, equal_to(['y', 'xy']), label='y')
 
@@ -106,7 +108,7 @@ def even_odd(elem):
       assert_that(named.odd, equal_to([1, 3]), label='named.odd')
 
       unnamed = ints | 'unnamed' >> beam.FlatMap(even_odd).with_outputs()
-      unnamed[None] | beam.Map(id)
+      unnamed[None] | beam.Map(id)  # pylint: disable=expression-not-assigned
       assert_that(unnamed[None], equal_to([1, 2, 3]), label='unnamed.all')
       assert_that(unnamed.even, equal_to([2]), label='unnamed.even')
       assert_that(unnamed.odd, equal_to([1, 3]), label='unnamed.odd')
@@ -143,9 +145,10 @@ def cross_product(elem, sides):
 
   def test_group_by_key(self):
     with self.create_pipeline() as p:
-      res = (p | beam.Create([('a', 1), ('a', 2), ('b', 3)])
-               | beam.GroupByKey()
-               | beam.Map(lambda (k, vs): (k, sorted(vs))))
+      res = (p
+             | beam.Create([('a', 1), ('a', 2), ('b', 3)])
+             | beam.GroupByKey()
+             | beam.Map(lambda (k, vs): (k, sorted(vs))))
       assert_that(res, equal_to([('a', [1, 2]), ('b', [3])]))
 
   def test_flatten(self):
@@ -157,8 +160,9 @@ def test_flatten(self):
 
   def test_combine_per_key(self):
     with self.create_pipeline() as p:
-      res = (p | beam.Create([('a', 1), ('a', 2), ('b', 3)])
-               | beam.CombinePerKey(beam.combiners.MeanCombineFn()))
+      res = (p
+             | beam.Create([('a', 1), ('a', 2), ('b', 3)])
+             | beam.CombinePerKey(beam.combiners.MeanCombineFn()))
       assert_that(res, equal_to([('a', 1.5), ('b', 3.0)]))
 
   def test_read(self):
@@ -171,11 +175,12 @@ def test_read(self):
 
   def test_windowing(self):
     with self.create_pipeline() as p:
-      res = (p | beam.Create([1, 2, 100, 101, 102])
-               | beam.Map(lambda t: TimestampedValue(('k', t), t))
-               | beam.WindowInto(beam.transforms.window.Sessions(10))
-               | beam.GroupByKey()
-               | beam.Map(lambda (k, vs): (k, sorted(vs))))
+      res = (p
+             | beam.Create([1, 2, 100, 101, 102])
+             | beam.Map(lambda t: TimestampedValue(('k', t), t))
+             | beam.WindowInto(beam.transforms.window.Sessions(10))
+             | beam.GroupByKey()
+             | beam.Map(lambda (k, vs): (k, sorted(vs))))
       assert_that(res, equal_to([('k', [1, 2]), ('k', [100, 101, 102])]))
 
 if __name__ == '__main__':
diff --git a/sdks/python/apache_beam/runners/worker/operation_specs.py b/sdks/python/apache_beam/runners/worker/operation_specs.py
index 5b2908f964b2..8eb1bf6df476 100644
--- a/sdks/python/apache_beam/runners/worker/operation_specs.py
+++ b/sdks/python/apache_beam/runners/worker/operation_specs.py
@@ -325,7 +325,7 @@ def get_coder_from_spec(coder_spec):
     return coders.WindowedValueCoder(value_coder, window_coder=window_coder)
   elif coder_spec['@type'] == 'kind:global_window':
     assert ('component_encodings' not in coder_spec
-            or len(coder_spec['component_encodings'] == 0))
+            or not coder_spec['component_encodings'])
     return coders.GlobalWindowCoder()
   elif coder_spec['@type'] == 'kind:length_prefix':
     assert len(coder_spec['component_encodings']) == 1
diff --git a/sdks/python/apache_beam/runners/worker/sdk_worker.py b/sdks/python/apache_beam/runners/worker/sdk_worker.py
index bd39c5eb1b53..63928aa0bddb 100644
--- a/sdks/python/apache_beam/runners/worker/sdk_worker.py
+++ b/sdks/python/apache_beam/runners/worker/sdk_worker.py
@@ -19,7 +19,6 @@
 from __future__ import print_function
 
 import collections
-import contextlib
 import json
 import logging
 import Queue as queue
diff --git a/sdks/python/apache_beam/runners/worker/statesampler_test.py b/sdks/python/apache_beam/runners/worker/statesampler_test.py
index 3331c5c5c5d7..0e63f07a2b85 100644
--- a/sdks/python/apache_beam/runners/worker/statesampler_test.py
+++ b/sdks/python/apache_beam/runners/worker/statesampler_test.py
@@ -93,4 +93,3 @@ def test_sampler_transition_overhead(self):
 if __name__ == '__main__':
   logging.getLogger().setLevel(logging.INFO)
   unittest.main()
-

From 010f5e12372c87801e6f6306b70d8d7a1ef6adaa Mon Sep 17 00:00:00 2001
From: Robert Bradshaw <robertwb@gmail.com>
Date: Mon, 24 Apr 2017 16:21:37 -0700
Subject: [PATCH 18/33] Update licenses.

---
 .../runners/portability/fn_api_runner.py       | 17 +++++++++++++++++
 .../runners/portability/fn_api_runner_test.py  | 17 ++++++++++++++++-
 .../portability/maptask_executor_runner.py     | 18 ++++++++++++++++++
 .../maptask_executor_runner_test.py            | 17 ++++++++++++++++-
 .../apache_beam/runners/worker/data_plane.py   | 12 ++++++++----
 .../runners/worker/data_plane_test.py          | 12 ++++++++----
 .../apache_beam/runners/worker/log_handler.py  | 13 ++++++++-----
 .../runners/worker/log_handler_test.py         | 13 +++++++++----
 .../apache_beam/runners/worker/logger.py       | 13 ++++++++-----
 .../apache_beam/runners/worker/logger_test.py  | 13 ++++++++-----
 .../apache_beam/runners/worker/opcounters.py   | 15 +++++++++------
 .../runners/worker/opcounters_test.py          | 15 ++++++++-------
 .../runners/worker/operation_specs.py          | 13 ++++++++-----
 .../apache_beam/runners/worker/operations.py   | 15 +++++++++------
 .../apache_beam/runners/worker/sdk_worker.py   | 13 ++++++++-----
 .../runners/worker/sdk_worker_main.py          | 13 ++++++++-----
 .../runners/worker/sdk_worker_test.py          | 13 ++++++++-----
 .../apache_beam/runners/worker/sideinputs.py   | 15 +++++++++------
 .../runners/worker/sideinputs_test.py          | 13 ++++++++-----
 .../runners/worker/statesampler_fake.py        |  1 -
 .../runners/worker/statesampler_test.py        | 13 ++++++++-----
 21 files changed, 199 insertions(+), 85 deletions(-)

diff --git a/sdks/python/apache_beam/runners/portability/fn_api_runner.py b/sdks/python/apache_beam/runners/portability/fn_api_runner.py
index 5a68c4e473eb..f75af090330f 100644
--- a/sdks/python/apache_beam/runners/portability/fn_api_runner.py
+++ b/sdks/python/apache_beam/runners/portability/fn_api_runner.py
@@ -1,3 +1,20 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
 """A PipelineRunner using the SDK harness.
 """
 import collections
diff --git a/sdks/python/apache_beam/runners/portability/fn_api_runner_test.py b/sdks/python/apache_beam/runners/portability/fn_api_runner_test.py
index 10d0d288a94e..fdb70576a158 100644
--- a/sdks/python/apache_beam/runners/portability/fn_api_runner_test.py
+++ b/sdks/python/apache_beam/runners/portability/fn_api_runner_test.py
@@ -1,4 +1,19 @@
-"""Tests for google.cloud.dataflow.worker.worker_runner."""
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
 
 import logging
 import unittest
diff --git a/sdks/python/apache_beam/runners/portability/maptask_executor_runner.py b/sdks/python/apache_beam/runners/portability/maptask_executor_runner.py
index ec42bf5320b1..e5c62df44cab 100644
--- a/sdks/python/apache_beam/runners/portability/maptask_executor_runner.py
+++ b/sdks/python/apache_beam/runners/portability/maptask_executor_runner.py
@@ -1,5 +1,23 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
 """Beam runner for testing/profiling worker code directly.
 """
+
 import collections
 import logging
 import time
diff --git a/sdks/python/apache_beam/runners/portability/maptask_executor_runner_test.py b/sdks/python/apache_beam/runners/portability/maptask_executor_runner_test.py
index 473c71aa46e4..c01c082fd628 100644
--- a/sdks/python/apache_beam/runners/portability/maptask_executor_runner_test.py
+++ b/sdks/python/apache_beam/runners/portability/maptask_executor_runner_test.py
@@ -1,4 +1,19 @@
-"""Tests for google.cloud.dataflow.worker.worker_runner."""
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
 
 import logging
 import tempfile
diff --git a/sdks/python/apache_beam/runners/worker/data_plane.py b/sdks/python/apache_beam/runners/worker/data_plane.py
index 37e6369a2ee2..6425447994f6 100644
--- a/sdks/python/apache_beam/runners/worker/data_plane.py
+++ b/sdks/python/apache_beam/runners/worker/data_plane.py
@@ -1,15 +1,19 @@
 #
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
 #
-#      http://www.apache.org/licenses/LICENSE-2.0
+#    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+#
 
 """Implementation of DataChannels for communicating across the data plane."""
 
diff --git a/sdks/python/apache_beam/runners/worker/data_plane_test.py b/sdks/python/apache_beam/runners/worker/data_plane_test.py
index 571af9ef7819..5ba19c2584e3 100644
--- a/sdks/python/apache_beam/runners/worker/data_plane_test.py
+++ b/sdks/python/apache_beam/runners/worker/data_plane_test.py
@@ -1,15 +1,19 @@
 #
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
 #
-#      http://www.apache.org/licenses/LICENSE-2.0
+#    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+#
 
 """Tests for apache_beam.runners.worker.data_plane."""
 
diff --git a/sdks/python/apache_beam/runners/worker/log_handler.py b/sdks/python/apache_beam/runners/worker/log_handler.py
index 8654628a5cb0..c68b3c58764a 100644
--- a/sdks/python/apache_beam/runners/worker/log_handler.py
+++ b/sdks/python/apache_beam/runners/worker/log_handler.py
@@ -1,16 +1,19 @@
 #
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
 #
-#      http://www.apache.org/licenses/LICENSE-2.0
+#    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
+#
 """Beam fn API log handler."""
 
 import logging
diff --git a/sdks/python/apache_beam/runners/worker/log_handler_test.py b/sdks/python/apache_beam/runners/worker/log_handler_test.py
index cc6982084d84..701e755c1693 100644
--- a/sdks/python/apache_beam/runners/worker/log_handler_test.py
+++ b/sdks/python/apache_beam/runners/worker/log_handler_test.py
@@ -1,15 +1,20 @@
 #
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
 #
-#      http://www.apache.org/licenses/LICENSE-2.0
+#    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+#
+
 
 import logging
 import unittest
diff --git a/sdks/python/apache_beam/runners/worker/logger.py b/sdks/python/apache_beam/runners/worker/logger.py
index 0a18078a2c99..728860abd488 100644
--- a/sdks/python/apache_beam/runners/worker/logger.py
+++ b/sdks/python/apache_beam/runners/worker/logger.py
@@ -1,16 +1,19 @@
-# Copyright 2016 Google Inc. All Rights Reserved.
 #
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
 #
-#      http://www.apache.org/licenses/LICENSE-2.0
+#    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+#
 
 """Python Dataflow worker logging."""
 
diff --git a/sdks/python/apache_beam/runners/worker/logger_test.py b/sdks/python/apache_beam/runners/worker/logger_test.py
index 2d90a1716136..fb1fb296163d 100644
--- a/sdks/python/apache_beam/runners/worker/logger_test.py
+++ b/sdks/python/apache_beam/runners/worker/logger_test.py
@@ -1,16 +1,19 @@
-# Copyright 2016 Google Inc. All Rights Reserved.
 #
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
 #
-#      http://www.apache.org/licenses/LICENSE-2.0
+#    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+#
 
 """Tests for worker logging utilities."""
 
diff --git a/sdks/python/apache_beam/runners/worker/opcounters.py b/sdks/python/apache_beam/runners/worker/opcounters.py
index 1b48b1306308..56ce0db6c8be 100644
--- a/sdks/python/apache_beam/runners/worker/opcounters.py
+++ b/sdks/python/apache_beam/runners/worker/opcounters.py
@@ -1,16 +1,19 @@
-# Copyright 2016 Google Inc. All Rights Reserved.
 #
-# Licensed under the Apache License, Version 2.0 (the 'License');
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
 #
-#      http://www.apache.org/licenses/LICENSE-2.0
+#    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an 'AS IS' BASIS,
+# distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+#
 
 # cython: profile=True
 
diff --git a/sdks/python/apache_beam/runners/worker/opcounters_test.py b/sdks/python/apache_beam/runners/worker/opcounters_test.py
index 34965238cd1a..6356bbbbf571 100644
--- a/sdks/python/apache_beam/runners/worker/opcounters_test.py
+++ b/sdks/python/apache_beam/runners/worker/opcounters_test.py
@@ -1,18 +1,19 @@
-# Copyright 2016 Google Inc. All Rights Reserved.
 #
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
 #
-#      http://www.apache.org/licenses/LICENSE-2.0
+#    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
-"""Tests for worker counters."""
+#
 
 import logging
 import math
diff --git a/sdks/python/apache_beam/runners/worker/operation_specs.py b/sdks/python/apache_beam/runners/worker/operation_specs.py
index 8eb1bf6df476..81e5d056e491 100644
--- a/sdks/python/apache_beam/runners/worker/operation_specs.py
+++ b/sdks/python/apache_beam/runners/worker/operation_specs.py
@@ -1,16 +1,19 @@
-# Copyright 2016 Google Inc. All Rights Reserved.
 #
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
 #
-#      http://www.apache.org/licenses/LICENSE-2.0
+#    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+#
 
 """Worker utilities for representing MapTasks.
 
diff --git a/sdks/python/apache_beam/runners/worker/operations.py b/sdks/python/apache_beam/runners/worker/operations.py
index da4adce32d86..79fd58b3fd41 100644
--- a/sdks/python/apache_beam/runners/worker/operations.py
+++ b/sdks/python/apache_beam/runners/worker/operations.py
@@ -1,16 +1,19 @@
-# Copyright 2016 Google Inc. All Rights Reserved.
 #
-# Licensed under the Apache License, Version 2.0 (the 'License');
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
 #
-#      http://www.apache.org/licenses/LICENSE-2.0
+#    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an 'AS IS' BASIS,
+# distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+#
 
 # cython: profile=True
 
diff --git a/sdks/python/apache_beam/runners/worker/sdk_worker.py b/sdks/python/apache_beam/runners/worker/sdk_worker.py
index 63928aa0bddb..6907f6edc92d 100644
--- a/sdks/python/apache_beam/runners/worker/sdk_worker.py
+++ b/sdks/python/apache_beam/runners/worker/sdk_worker.py
@@ -1,16 +1,19 @@
-# Copyright 2016 Google Inc. All Rights Reserved.
 #
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
 #
-#      http://www.apache.org/licenses/LICENSE-2.0
+#    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+#
 
 """SDK harness for executing Python Fns via the Fn API."""
 
diff --git a/sdks/python/apache_beam/runners/worker/sdk_worker_main.py b/sdks/python/apache_beam/runners/worker/sdk_worker_main.py
index 12870ac03388..28828c3b4925 100644
--- a/sdks/python/apache_beam/runners/worker/sdk_worker_main.py
+++ b/sdks/python/apache_beam/runners/worker/sdk_worker_main.py
@@ -1,16 +1,19 @@
-# Copyright 2017 Google Inc. All Rights Reserved.
 #
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
 #
-#      http://www.apache.org/licenses/LICENSE-2.0
+#    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+#
 
 """SDK Fn Harness entry point."""
 
diff --git a/sdks/python/apache_beam/runners/worker/sdk_worker_test.py b/sdks/python/apache_beam/runners/worker/sdk_worker_test.py
index e7493ba5ef65..1fac82dfa887 100644
--- a/sdks/python/apache_beam/runners/worker/sdk_worker_test.py
+++ b/sdks/python/apache_beam/runners/worker/sdk_worker_test.py
@@ -1,16 +1,19 @@
-# Copyright 2016 Google Inc. All Rights Reserved.
 #
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
 #
-#      http://www.apache.org/licenses/LICENSE-2.0
+#    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+#
 
 """Tests for apache_beam.runners.worker.sdk_worker."""
 
diff --git a/sdks/python/apache_beam/runners/worker/sideinputs.py b/sdks/python/apache_beam/runners/worker/sideinputs.py
index 76981669cb56..3bac3d9a2e4b 100644
--- a/sdks/python/apache_beam/runners/worker/sideinputs.py
+++ b/sdks/python/apache_beam/runners/worker/sideinputs.py
@@ -1,16 +1,19 @@
-# Copyright 2016 Google Inc. All Rights Reserved.
 #
-# Licensed under the Apache License, Version 2.0 (the 'License');
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
 #
-#      http://www.apache.org/licenses/LICENSE-2.0
+#    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an 'AS IS' BASIS,
+# distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+#
 
 """Utilities for handling side inputs."""
 
diff --git a/sdks/python/apache_beam/runners/worker/sideinputs_test.py b/sdks/python/apache_beam/runners/worker/sideinputs_test.py
index 53cfa87deb7d..d243bbe4e6ee 100644
--- a/sdks/python/apache_beam/runners/worker/sideinputs_test.py
+++ b/sdks/python/apache_beam/runners/worker/sideinputs_test.py
@@ -1,16 +1,19 @@
-# Copyright 2016 Google Inc. All Rights Reserved.
 #
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
 #
-#      http://www.apache.org/licenses/LICENSE-2.0
+#    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+#
 
 """Tests for side input utilities."""
 
diff --git a/sdks/python/apache_beam/runners/worker/statesampler_fake.py b/sdks/python/apache_beam/runners/worker/statesampler_fake.py
index efd7f2db8fcd..c88d6991bdd7 100644
--- a/sdks/python/apache_beam/runners/worker/statesampler_fake.py
+++ b/sdks/python/apache_beam/runners/worker/statesampler_fake.py
@@ -15,7 +15,6 @@
 # limitations under the License.
 #
 
-
 class StateSampler(object):
 
   def __init__(self, *args, **kwargs):
diff --git a/sdks/python/apache_beam/runners/worker/statesampler_test.py b/sdks/python/apache_beam/runners/worker/statesampler_test.py
index 0e63f07a2b85..0df0609333df 100644
--- a/sdks/python/apache_beam/runners/worker/statesampler_test.py
+++ b/sdks/python/apache_beam/runners/worker/statesampler_test.py
@@ -1,16 +1,19 @@
-# Copyright 2016 Google Inc. All Rights Reserved.
 #
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
 #
-#      http://www.apache.org/licenses/LICENSE-2.0
+#    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+#
 
 """Tests for state sampler."""
 

From af744aa08b701ce577a2d425614b75f303862b5d Mon Sep 17 00:00:00 2001
From: Robert Bradshaw <robertwb@gmail.com>
Date: Mon, 24 Apr 2017 16:26:49 -0700
Subject: [PATCH 19/33] Reviewer comments.

---
 sdks/python/apache_beam/runners/portability/fn_api_runner.py  | 4 ++--
 .../apache_beam/runners/portability/fn_api_runner_test.py     | 2 +-
 sdks/python/apache_beam/runners/worker/log_handler.py         | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/sdks/python/apache_beam/runners/portability/fn_api_runner.py b/sdks/python/apache_beam/runners/portability/fn_api_runner.py
index f75af090330f..71092d166d2f 100644
--- a/sdks/python/apache_beam/runners/portability/fn_api_runner.py
+++ b/sdks/python/apache_beam/runners/portability/fn_api_runner.py
@@ -188,7 +188,7 @@ def outputs(op):
           # concatenate them together
           for element in operation.source.source.read(None):
             element_coder.encode_to_stream(element, output_stream, True)
-          target_name = '%s' % self._next_uid()
+          target_name = self._next_uid()
           input_data[(transform_id, target_name)] = output_stream.get()
           fn = beam_fn_api_pb2.FunctionSpec(urn=sdk_worker.DATA_INPUT_URN,
                                             id=self._next_uid())
@@ -205,7 +205,7 @@ def outputs(op):
               GlobalWindows.windowed_value(operation.source),
               output_stream,
               True)
-          target_name = '%s' % self._next_uid()
+          target_name = self._next_uid()
           input_data[(input_transform_id, target_name)] = output_stream.get()
           input_ptransform = beam_fn_api_pb2.PrimitiveTransform(
               id=input_transform_id,
diff --git a/sdks/python/apache_beam/runners/portability/fn_api_runner_test.py b/sdks/python/apache_beam/runners/portability/fn_api_runner_test.py
index fdb70576a158..633602f2a9ac 100644
--- a/sdks/python/apache_beam/runners/portability/fn_api_runner_test.py
+++ b/sdks/python/apache_beam/runners/portability/fn_api_runner_test.py
@@ -32,7 +32,7 @@ def test_combine_per_key(self):
     # TODO(robertwb): Implement PGBKCV operation.
     pass
 
-  # Inherits all tests from worker_runner_base_test.WorkerRunnerBaseTest
+  # Inherits all tests from maptask_executor_runner.MapTaskExecutorRunner
 
 
 if __name__ == '__main__':
diff --git a/sdks/python/apache_beam/runners/worker/log_handler.py b/sdks/python/apache_beam/runners/worker/log_handler.py
index c68b3c58764a..d9ec7409b4c1 100644
--- a/sdks/python/apache_beam/runners/worker/log_handler.py
+++ b/sdks/python/apache_beam/runners/worker/log_handler.py
@@ -37,7 +37,7 @@ class FnApiLogRecordHandler(logging.Handler):
   LOG_LEVEL_MAP = {
       logging.FATAL: beam_fn_api_pb2.LogEntry.CRITICAL,
       logging.ERROR: beam_fn_api_pb2.LogEntry.ERROR,
-      logging.WARNING: beam_fn_api_pb2.LogEntry.ERROR,
+      logging.WARNING: beam_fn_api_pb2.LogEntry.WARNING,
       logging.INFO: beam_fn_api_pb2.LogEntry.INFO,
       logging.DEBUG: beam_fn_api_pb2.LogEntry.DEBUG
   }

From c72405e1f2afa1a6bbbea80ea917c86759faabcf Mon Sep 17 00:00:00 2001
From: Robert Bradshaw <robertwb@gmail.com>
Date: Mon, 24 Apr 2017 16:35:19 -0700
Subject: [PATCH 20/33] Proto doesn't have WARNING, but it does have WARN.

---
 sdks/python/apache_beam/runners/worker/log_handler.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sdks/python/apache_beam/runners/worker/log_handler.py b/sdks/python/apache_beam/runners/worker/log_handler.py
index d9ec7409b4c1..b9e36ad2c312 100644
--- a/sdks/python/apache_beam/runners/worker/log_handler.py
+++ b/sdks/python/apache_beam/runners/worker/log_handler.py
@@ -37,7 +37,7 @@ class FnApiLogRecordHandler(logging.Handler):
   LOG_LEVEL_MAP = {
       logging.FATAL: beam_fn_api_pb2.LogEntry.CRITICAL,
       logging.ERROR: beam_fn_api_pb2.LogEntry.ERROR,
-      logging.WARNING: beam_fn_api_pb2.LogEntry.WARNING,
+      logging.WARNING: beam_fn_api_pb2.LogEntry.WARN,
       logging.INFO: beam_fn_api_pb2.LogEntry.INFO,
       logging.DEBUG: beam_fn_api_pb2.LogEntry.DEBUG
   }

From f72770e81fbb0927793ec05a5228a240fc0c95df Mon Sep 17 00:00:00 2001
From: Robert Bradshaw <robertwb@gmail.com>
Date: Mon, 24 Apr 2017 16:44:50 -0700
Subject: [PATCH 21/33] one more lint error

---
 sdks/python/apache_beam/runners/worker/statesampler_fake.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sdks/python/apache_beam/runners/worker/statesampler_fake.py b/sdks/python/apache_beam/runners/worker/statesampler_fake.py
index c88d6991bdd7..efd7f2db8fcd 100644
--- a/sdks/python/apache_beam/runners/worker/statesampler_fake.py
+++ b/sdks/python/apache_beam/runners/worker/statesampler_fake.py
@@ -15,6 +15,7 @@
 # limitations under the License.
 #
 
+
 class StateSampler(object):
 
   def __init__(self, *args, **kwargs):

From d7022dd66933f83a50301a8b04ba6eed4a136837 Mon Sep 17 00:00:00 2001
From: Robert Bradshaw <robertwb@gmail.com>
Date: Tue, 25 Apr 2017 13:02:57 -0700
Subject: [PATCH 22/33] move portpicker

---
 sdks/python/apache_beam/runners/portability/fn_api_runner.py    | 2 +-
 sdks/python/apache_beam/runners/worker/data_plane_test.py       | 2 +-
 sdks/python/apache_beam/runners/worker/log_handler_test.py      | 2 +-
 sdks/python/apache_beam/{utils => runners/worker}/portpicker.py | 0
 sdks/python/apache_beam/runners/worker/sdk_worker_test.py       | 2 +-
 5 files changed, 4 insertions(+), 4 deletions(-)
 rename sdks/python/apache_beam/{utils => runners/worker}/portpicker.py (100%)

diff --git a/sdks/python/apache_beam/runners/portability/fn_api_runner.py b/sdks/python/apache_beam/runners/portability/fn_api_runner.py
index 71092d166d2f..f788470677d3 100644
--- a/sdks/python/apache_beam/runners/portability/fn_api_runner.py
+++ b/sdks/python/apache_beam/runners/portability/fn_api_runner.py
@@ -37,8 +37,8 @@
 from apache_beam.runners.portability import maptask_executor_runner
 from apache_beam.runners.worker import data_plane
 from apache_beam.runners.worker import operation_specs
+from apache_beam.runners.worker import portpicker
 from apache_beam.runners.worker import sdk_worker
-from apache_beam.utils import portpicker
 
 
 def streaming_rpc_handler(cls, method_name):
diff --git a/sdks/python/apache_beam/runners/worker/data_plane_test.py b/sdks/python/apache_beam/runners/worker/data_plane_test.py
index 5ba19c2584e3..c7fcb81bfe73 100644
--- a/sdks/python/apache_beam/runners/worker/data_plane_test.py
+++ b/sdks/python/apache_beam/runners/worker/data_plane_test.py
@@ -31,7 +31,7 @@
 
 from apache_beam.runners.api import beam_fn_api_pb2
 from apache_beam.runners.worker import data_plane
-from apache_beam.utils import portpicker
+from apache_beam.runners.worker import portpicker
 
 
 def timeout(timeout_secs):
diff --git a/sdks/python/apache_beam/runners/worker/log_handler_test.py b/sdks/python/apache_beam/runners/worker/log_handler_test.py
index 701e755c1693..13656f243d0d 100644
--- a/sdks/python/apache_beam/runners/worker/log_handler_test.py
+++ b/sdks/python/apache_beam/runners/worker/log_handler_test.py
@@ -24,7 +24,7 @@
 
 from apache_beam.runners.api import beam_fn_api_pb2
 from apache_beam.runners.worker import log_handler
-from apache_beam.utils import portpicker
+from apache_beam.runners.worker import portpicker
 
 
 class BeamFnLoggingServicer(beam_fn_api_pb2.BeamFnLoggingServicer):
diff --git a/sdks/python/apache_beam/utils/portpicker.py b/sdks/python/apache_beam/runners/worker/portpicker.py
similarity index 100%
rename from sdks/python/apache_beam/utils/portpicker.py
rename to sdks/python/apache_beam/runners/worker/portpicker.py
diff --git a/sdks/python/apache_beam/runners/worker/sdk_worker_test.py b/sdks/python/apache_beam/runners/worker/sdk_worker_test.py
index 1fac82dfa887..cc138b3823bf 100644
--- a/sdks/python/apache_beam/runners/worker/sdk_worker_test.py
+++ b/sdks/python/apache_beam/runners/worker/sdk_worker_test.py
@@ -32,7 +32,7 @@
 from apache_beam.runners.api import beam_fn_api_pb2
 from apache_beam.runners.worker import sdk_worker
 from apache_beam.runners.worker.data_plane import GrpcClientDataChannelFactory
-from apache_beam.utils import portpicker
+from apache_beam.runners.worker import portpicker
 
 
 class BeamFnControlServicer(beam_fn_api_pb2.BeamFnControlServicer):

From 37d463f0cfd9c430cd94a8ea33c0571a3cd66bfb Mon Sep 17 00:00:00 2001
From: Robert Bradshaw <robertwb@gmail.com>
Date: Tue, 25 Apr 2017 13:15:56 -0700
Subject: [PATCH 23/33] More license fixes.

---
 sdks/python/apache_beam/runners/worker/logger.pxd | 13 ++++++++-----
 .../apache_beam/runners/worker/opcounters.pxd     | 15 +++++++++------
 .../apache_beam/runners/worker/operations.pxd     | 15 +++++++++------
 .../apache_beam/runners/worker/statesampler.pyx   | 13 ++++++++-----
 4 files changed, 34 insertions(+), 22 deletions(-)

diff --git a/sdks/python/apache_beam/runners/worker/logger.pxd b/sdks/python/apache_beam/runners/worker/logger.pxd
index 7667338cf9aa..201daf4e29ae 100644
--- a/sdks/python/apache_beam/runners/worker/logger.pxd
+++ b/sdks/python/apache_beam/runners/worker/logger.pxd
@@ -1,16 +1,19 @@
-# Copyright 2016 Google Inc. All Rights Reserved.
 #
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
 #
-#      http://www.apache.org/licenses/LICENSE-2.0
+#    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+#
 
 cimport cython
 
diff --git a/sdks/python/apache_beam/runners/worker/opcounters.pxd b/sdks/python/apache_beam/runners/worker/opcounters.pxd
index 57382054d87b..5c1079f64652 100644
--- a/sdks/python/apache_beam/runners/worker/opcounters.pxd
+++ b/sdks/python/apache_beam/runners/worker/opcounters.pxd
@@ -1,16 +1,19 @@
-# Copyright 2016 Google Inc. All Rights Reserved.
 #
-# Licensed under the Apache License, Version 2.0 (the 'License');
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
 #
-#      http://www.apache.org/licenses/LICENSE-2.0
+#    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an 'AS IS' BASIS,
+# distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+#
 
 cimport cython
 cimport libc.stdint
diff --git a/sdks/python/apache_beam/runners/worker/operations.pxd b/sdks/python/apache_beam/runners/worker/operations.pxd
index eee75a692b64..2b4e52616166 100644
--- a/sdks/python/apache_beam/runners/worker/operations.pxd
+++ b/sdks/python/apache_beam/runners/worker/operations.pxd
@@ -1,16 +1,19 @@
-# Copyright 2016 Google Inc. All Rights Reserved.
 #
-# Licensed under the Apache License, Version 2.0 (the 'License');
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
 #
-#      http://www.apache.org/licenses/LICENSE-2.0
+#    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an 'AS IS' BASIS,
+# distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+#
 
 cimport cython
 
diff --git a/sdks/python/apache_beam/runners/worker/statesampler.pyx b/sdks/python/apache_beam/runners/worker/statesampler.pyx
index a311078b2f5f..b320cbf5911f 100644
--- a/sdks/python/apache_beam/runners/worker/statesampler.pyx
+++ b/sdks/python/apache_beam/runners/worker/statesampler.pyx
@@ -1,16 +1,19 @@
-# Copyright 2016 Google Inc. All Rights Reserved.
 #
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
 #
-#      http://www.apache.org/licenses/LICENSE-2.0
+#    http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+#
 
 # cython: profile=True
 

From 4f401586cd35dcf7933a57cb74d21f0d62062e20 Mon Sep 17 00:00:00 2001
From: Robert Bradshaw <robertwb@gmail.com>
Date: Tue, 25 Apr 2017 13:20:55 -0700
Subject: [PATCH 24/33] More reviewer comments.

---
 .../apache_beam/runners/worker/logger.py      |  2 +-
 .../runners/worker/operation_specs.py         |  6 ++----
 .../apache_beam/runners/worker/operations.py  |  2 +-
 .../runners/worker/sdk_worker_test.py         |  6 +++---
 .../runners/worker/statesampler.pyx           |  2 +-
 .../runners/worker/statesampler_test.py       | 19 +++++++++++--------
 6 files changed, 19 insertions(+), 18 deletions(-)

diff --git a/sdks/python/apache_beam/runners/worker/logger.py b/sdks/python/apache_beam/runners/worker/logger.py
index 728860abd488..6d68e066d2c1 100644
--- a/sdks/python/apache_beam/runners/worker/logger.py
+++ b/sdks/python/apache_beam/runners/worker/logger.py
@@ -15,7 +15,7 @@
 # limitations under the License.
 #
 
-"""Python Dataflow worker logging."""
+"""Python worker logging."""
 
 import json
 import logging
diff --git a/sdks/python/apache_beam/runners/worker/operation_specs.py b/sdks/python/apache_beam/runners/worker/operation_specs.py
index 81e5d056e491..159156ff7da4 100644
--- a/sdks/python/apache_beam/runners/worker/operation_specs.py
+++ b/sdks/python/apache_beam/runners/worker/operation_specs.py
@@ -230,7 +230,7 @@ def worker_object_to_string(worker_object):
   combine_fn: A serialized CombineFn object to be used after executing the
     GroupAlsoByWindows operation. May be None if not a combining operation.
   phase: Possible values are 'all', 'add', 'merge', and 'extract'.
-    The dataflow optimizer may split the user combiner in 3 separate
+    A runner optimizer may split the user combiner in 3 separate
     phases (ADD, MERGE, and EXTRACT), on separate VMs, as it sees
     fit. The phase attribute dictates which DoFn is actually running in
     the worker. May be None if not a combining operation.
@@ -253,7 +253,7 @@ def worker_object_to_string(worker_object):
 Attributes:
   serialized_fn: A serialized CombineFn object to be used.
   phase: Possible values are 'all', 'add', 'merge', and 'extract'.
-    The dataflow optimizer may split the user combiner in 3 separate
+    A runner optimizer may split the user combiner in 3 separate
     phases (ADD, MERGE, and EXTRACT), on separate VMs, as it sees
     fit. The phase attribute dictates which DoFn is actually running in
     the worker.
@@ -304,8 +304,6 @@ def get_coder_from_spec(coder_spec):
 
   # Ignore the wrappers in these encodings.
   # TODO(silviuc): Make sure with all the renamings that names below are ok.
-  ignored_wrappers = (
-      'com.google.cloud.dataflow.sdk.util.TimerOrElement$TimerOrElementCoder')
   if coder_spec['@type'] in ignored_wrappers:
     assert len(coder_spec['component_encodings']) == 1
     coder_spec = coder_spec['component_encodings'][0]
diff --git a/sdks/python/apache_beam/runners/worker/operations.py b/sdks/python/apache_beam/runners/worker/operations.py
index 79fd58b3fd41..5dbe57e77b49 100644
--- a/sdks/python/apache_beam/runners/worker/operations.py
+++ b/sdks/python/apache_beam/runners/worker/operations.py
@@ -109,7 +109,7 @@ def __init__(self, operation_name, spec, counter_factory, state_sampler):
     """Initializes a worker operation instance.
 
     Args:
-      operation_name: The system name assigned by the Dataflow service for this
+      operation_name: The system name assigned by the runner for this
         operation.
       spec: A operation_specs.Worker* instance.
       counter_factory: The CounterFactory to use for our counters.
diff --git a/sdks/python/apache_beam/runners/worker/sdk_worker_test.py b/sdks/python/apache_beam/runners/worker/sdk_worker_test.py
index cc138b3823bf..7c6aa11e9627 100644
--- a/sdks/python/apache_beam/runners/worker/sdk_worker_test.py
+++ b/sdks/python/apache_beam/runners/worker/sdk_worker_test.py
@@ -30,8 +30,8 @@
 from apache_beam.io.concat_source_test import RangeSource
 from apache_beam.io.iobase import SourceBundle
 from apache_beam.runners.api import beam_fn_api_pb2
+from apache_beam.runners.worker import data_plane
 from apache_beam.runners.worker import sdk_worker
-from apache_beam.runners.worker.data_plane import GrpcClientDataChannelFactory
 from apache_beam.runners.worker import portpicker
 
 
@@ -65,7 +65,6 @@ def Control(self, response_iterator, context):
 class SdkWorkerTest(unittest.TestCase):
 
   def test_fn_registration(self):
-
     fns = [beam_fn_api_pb2.FunctionSpec(id=str(ix)) for ix in range(4)]
     test_port = portpicker.pick_unused_port()
 
@@ -96,7 +95,8 @@ def test_source_split(self):
     source = RangeSource(0, 100)
     expected_splits = list(source.split(30))
 
-    worker = sdk_harness.SdkWorker(None, GrpcClientDataChannelFactory())
+    worker = sdk_harness.SdkWorker(
+        None, data_plane.GrpcClientDataChannelFactory())
     worker.register(
         beam_fn_api_pb2.RegisterRequest(
             process_bundle_descriptor=[beam_fn_api_pb2.ProcessBundleDescriptor(
diff --git a/sdks/python/apache_beam/runners/worker/statesampler.pyx b/sdks/python/apache_beam/runners/worker/statesampler.pyx
index b320cbf5911f..3ff6c20aa690 100644
--- a/sdks/python/apache_beam/runners/worker/statesampler.pyx
+++ b/sdks/python/apache_beam/runners/worker/statesampler.pyx
@@ -19,7 +19,7 @@
 
 """State sampler for tracking time spent in execution steps.
 
-The state sampler profiles the time spent in each step of a Dataflow pipeline.
+The state sampler profiles the time spent in each step of a pipeline.
 Operations (defined in executor.py) which are executed as part of a MapTask are
 instrumented with context managers provided by StateSampler.scoped_state().
 These context managers change the internal state of the StateSampler during each
diff --git a/sdks/python/apache_beam/runners/worker/statesampler_test.py b/sdks/python/apache_beam/runners/worker/statesampler_test.py
index 0df0609333df..fac929f468a4 100644
--- a/sdks/python/apache_beam/runners/worker/statesampler_test.py
+++ b/sdks/python/apache_beam/runners/worker/statesampler_test.py
@@ -21,11 +21,21 @@
 import time
 import unittest
 
+from nose.plugins.skip import SkipTest
+
 from apache_beam.utils.counters import CounterFactory
 
 
 class StateSamplerTest(unittest.TestCase):
 
+  def setUp(self):
+    try:
+      global statesampler
+      import statesampler
+    except ImportError:
+      raise SkipTest('State sampler not compiled.')
+    super(StateSamplerTest, self).setUp()
+
   def test_basic_sampler(self):
     # Set up state sampler.
     counter_factory = CounterFactory()
@@ -44,7 +54,7 @@ def test_basic_sampler(self):
     sampler.stop()
     sampler.commit_counters()
 
-    # Test that sampled state timings are within 5% of expected values.
+    # Test that sampled state timings are close to their expected values.
     expected_counter_values = {
         'basic-statea-msecs': 100,
         'basic-stateb-msecs': 200,
@@ -86,13 +96,6 @@ def test_sampler_transition_overhead(self):
     self.assertLess(overhead_us, 10.0)
 
 
-try:
-  from apache_beam.runners.worker import statesampler
-except ImportError:
-  # No compiler.
-  del StateSamplerTest
-
-
 if __name__ == '__main__':
   logging.getLogger().setLevel(logging.INFO)
   unittest.main()

From 87819557d0b2d655746060e2c2ed0eb970542b42 Mon Sep 17 00:00:00 2001
From: Robert Bradshaw <robertwb@google.com>
Date: Tue, 25 Apr 2017 16:34:27 -0500
Subject: [PATCH 25/33] Elide compiled statesampler from docs.

---
 sdks/python/generate_pydoc.sh | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sdks/python/generate_pydoc.sh b/sdks/python/generate_pydoc.sh
index b04e27a9b9a5..6039942e7eda 100755
--- a/sdks/python/generate_pydoc.sh
+++ b/sdks/python/generate_pydoc.sh
@@ -44,6 +44,8 @@ python $(type -p sphinx-apidoc) -f -o target/docs/source apache_beam \
 # Remove Cython modules from doc template; they won't load
 sed -i -e '/.. automodule:: apache_beam.coders.stream/d' \
     target/docs/source/apache_beam.coders.rst
+sed -i -e '/.. automodule:: apache_beam.runners.worker.statesampler/d' \
+    target/docs/source/apache_beam.runners.worker.rst
 
 # Create the configuration and index files
 cat > target/docs/source/conf.py <<'EOF'

From f92bcea7a6f39749cb20b1e30ec47e0c67e17c59 Mon Sep 17 00:00:00 2001
From: Robert Bradshaw <robertwb@gmail.com>
Date: Wed, 26 Apr 2017 16:12:14 -0700
Subject: [PATCH 26/33] Remove unneeded portpicker.

---
 .../runners/portability/fn_api_runner.py      |  7 ++--
 .../runners/worker/data_plane_test.py         |  4 +--
 .../runners/worker/log_handler_test.py        |  4 +--
 .../apache_beam/runners/worker/portpicker.py  | 32 -------------------
 .../runners/worker/sdk_worker_test.py         |  8 ++---
 5 files changed, 6 insertions(+), 49 deletions(-)
 delete mode 100644 sdks/python/apache_beam/runners/worker/portpicker.py

diff --git a/sdks/python/apache_beam/runners/portability/fn_api_runner.py b/sdks/python/apache_beam/runners/portability/fn_api_runner.py
index f788470677d3..5802c17f4276 100644
--- a/sdks/python/apache_beam/runners/portability/fn_api_runner.py
+++ b/sdks/python/apache_beam/runners/portability/fn_api_runner.py
@@ -37,7 +37,6 @@
 from apache_beam.runners.portability import maptask_executor_runner
 from apache_beam.runners.worker import data_plane
 from apache_beam.runners.worker import operation_specs
-from apache_beam.runners.worker import portpicker
 from apache_beam.runners.worker import sdk_worker
 
 
@@ -433,12 +432,10 @@ def __init__(self):
       self.state_handler = FnApiRunner.SimpleState()
       self.control_server = grpc.server(
           futures.ThreadPoolExecutor(max_workers=10))
-      self.control_port = portpicker.pick_unused_port()
-      self.control_server.add_insecure_port('[::]:%s' % self.control_port)
+      self.control_port = self.control_server.add_insecure_port('[::]:0')
 
       self.data_server = grpc.server(futures.ThreadPoolExecutor(max_workers=10))
-      self.data_port = portpicker.pick_unused_port()
-      self.data_server.add_insecure_port('[::]:%s' % self.data_port)
+      self.data_port = self.data_server.add_insecure_port('[::]:0')
 
       self.control_handler = streaming_rpc_handler(
           beam_fn_api_pb2.BeamFnControlServicer, 'Control')
diff --git a/sdks/python/apache_beam/runners/worker/data_plane_test.py b/sdks/python/apache_beam/runners/worker/data_plane_test.py
index c7fcb81bfe73..7340789b88d9 100644
--- a/sdks/python/apache_beam/runners/worker/data_plane_test.py
+++ b/sdks/python/apache_beam/runners/worker/data_plane_test.py
@@ -31,7 +31,6 @@
 
 from apache_beam.runners.api import beam_fn_api_pb2
 from apache_beam.runners.worker import data_plane
-from apache_beam.runners.worker import portpicker
 
 
 def timeout(timeout_secs):
@@ -62,11 +61,10 @@ class DataChannelTest(unittest.TestCase):
   def test_grpc_data_channel(self):
     data_channel_service = data_plane.GrpcServerDataChannel()
 
-    test_port = portpicker.pick_unused_port()
     server = grpc.server(futures.ThreadPoolExecutor(max_workers=2))
     beam_fn_api_pb2.add_BeamFnDataServicer_to_server(
         data_channel_service, server)
-    server.add_insecure_port('[::]:%s' % test_port)
+    test_port = server.add_insecure_port('[::]:0')
     server.start()
 
     data_channel_stub = beam_fn_api_pb2.BeamFnDataStub(
diff --git a/sdks/python/apache_beam/runners/worker/log_handler_test.py b/sdks/python/apache_beam/runners/worker/log_handler_test.py
index 13656f243d0d..600af4521bc1 100644
--- a/sdks/python/apache_beam/runners/worker/log_handler_test.py
+++ b/sdks/python/apache_beam/runners/worker/log_handler_test.py
@@ -24,7 +24,6 @@
 
 from apache_beam.runners.api import beam_fn_api_pb2
 from apache_beam.runners.worker import log_handler
-from apache_beam.runners.worker import portpicker
 
 
 class BeamFnLoggingServicer(beam_fn_api_pb2.BeamFnLoggingServicer):
@@ -44,11 +43,10 @@ class FnApiLogRecordHandlerTest(unittest.TestCase):
 
   def setUp(self):
     self.test_logging_service = BeamFnLoggingServicer()
-    self.test_port = portpicker.pick_unused_port()
     self.server = grpc.server(futures.ThreadPoolExecutor(max_workers=10))
     beam_fn_api_pb2.add_BeamFnLoggingServicer_to_server(
         self.test_logging_service, self.server)
-    self.server.add_insecure_port('[::]:%s' % self.test_port)
+    self.test_port = self.server.add_insecure_port('[::]:0')
     self.server.start()
 
     self.logging_service_descriptor = beam_fn_api_pb2.ApiServiceDescriptor()
diff --git a/sdks/python/apache_beam/runners/worker/portpicker.py b/sdks/python/apache_beam/runners/worker/portpicker.py
deleted file mode 100644
index 0a5bd5147b34..000000000000
--- a/sdks/python/apache_beam/runners/worker/portpicker.py
+++ /dev/null
@@ -1,32 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-import contextlib
-import socket
-
-
-def pick_unused_port():
-  """Attempts to return an unused port.
-
-  There is a window where the port could get re-used after it is closed
-  but before the caller has a chance to bind it again, but that's the
-  best we can do for libraries expecting a port number.
-  """
-  with contextlib.closing(
-      socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as s:
-    s.bind(('localhost', 0))
-    return s.getsockname()[1]
diff --git a/sdks/python/apache_beam/runners/worker/sdk_worker_test.py b/sdks/python/apache_beam/runners/worker/sdk_worker_test.py
index 7c6aa11e9627..996f44c7f4a0 100644
--- a/sdks/python/apache_beam/runners/worker/sdk_worker_test.py
+++ b/sdks/python/apache_beam/runners/worker/sdk_worker_test.py
@@ -32,7 +32,6 @@
 from apache_beam.runners.api import beam_fn_api_pb2
 from apache_beam.runners.worker import data_plane
 from apache_beam.runners.worker import sdk_worker
-from apache_beam.runners.worker import portpicker
 
 
 class BeamFnControlServicer(beam_fn_api_pb2.BeamFnControlServicer):
@@ -66,7 +65,6 @@ class SdkWorkerTest(unittest.TestCase):
 
   def test_fn_registration(self):
     fns = [beam_fn_api_pb2.FunctionSpec(id=str(ix)) for ix in range(4)]
-    test_port = portpicker.pick_unused_port()
 
     process_bundle_descriptors = [beam_fn_api_pb2.ProcessBundleDescriptor(
         id=str(100+ix),
@@ -80,7 +78,7 @@ def test_fn_registration(self):
 
     server = grpc.server(futures.ThreadPoolExecutor(max_workers=10))
     beam_fn_api_pb2.add_BeamFnControlServicer_to_server(test_controller, server)
-    server.add_insecure_port("[::]:%s" % test_port)
+    test_port = server.add_insecure_port("[::]:0")
     server.start()
 
     channel = grpc.insecure_channel("localhost:%s" % test_port)
@@ -125,8 +123,6 @@ def test_source_split_via_instruction(self):
     source = RangeSource(0, 100)
     expected_splits = list(source.split(30))
 
-    test_port = portpicker.pick_unused_port()
-
     test_controller = BeamFnControlServicer([
         beam_fn_api_pb2.InstructionRequest(
             instruction_id="register_request",
@@ -147,7 +143,7 @@ def test_source_split_via_instruction(self):
 
     server = grpc.server(futures.ThreadPoolExecutor(max_workers=10))
     beam_fn_api_pb2.add_BeamFnControlServicer_to_server(test_controller, server)
-    server.add_insecure_port("[::]:%s" % test_port)
+    test_port = server.add_insecure_port("[::]:0")
     server.start()
 
     channel = grpc.insecure_channel("localhost:%s" % test_port)

From f2240275ed4b2b92be46602befea4d9b3ae306bc Mon Sep 17 00:00:00 2001
From: Robert Bradshaw <robertwb@google.com>
Date: Thu, 27 Apr 2017 00:13:32 -0500
Subject: [PATCH 27/33] lint

---
 sdks/python/apache_beam/runners/worker/statesampler_test.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sdks/python/apache_beam/runners/worker/statesampler_test.py b/sdks/python/apache_beam/runners/worker/statesampler_test.py
index fac929f468a4..663cdecdab0c 100644
--- a/sdks/python/apache_beam/runners/worker/statesampler_test.py
+++ b/sdks/python/apache_beam/runners/worker/statesampler_test.py
@@ -30,6 +30,7 @@ class StateSamplerTest(unittest.TestCase):
 
   def setUp(self):
     try:
+      # pylint: disable=global-variable-not-assigned
       global statesampler
       import statesampler
     except ImportError:

From a8c6d7a027ad039b77d89525a9018a58cefd73b9 Mon Sep 17 00:00:00 2001
From: Robert Bradshaw <robertwb@gmail.com>
Date: Mon, 1 May 2017 12:42:54 -0700
Subject: [PATCH 28/33] Apache license for empty __init__.py files.

---
 .../apache_beam/runners/portability/__init__.py  | 16 ++++++++++++++++
 .../apache_beam/runners/worker/__init__.py       | 16 ++++++++++++++++
 2 files changed, 32 insertions(+)

diff --git a/sdks/python/apache_beam/runners/portability/__init__.py b/sdks/python/apache_beam/runners/portability/__init__.py
index e69de29bb2d1..cce3acad34a4 100644
--- a/sdks/python/apache_beam/runners/portability/__init__.py
+++ b/sdks/python/apache_beam/runners/portability/__init__.py
@@ -0,0 +1,16 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
diff --git a/sdks/python/apache_beam/runners/worker/__init__.py b/sdks/python/apache_beam/runners/worker/__init__.py
index e69de29bb2d1..cce3acad34a4 100644
--- a/sdks/python/apache_beam/runners/worker/__init__.py
+++ b/sdks/python/apache_beam/runners/worker/__init__.py
@@ -0,0 +1,16 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#

From b33a189951eed33001d96ecfac4f1697d758310d Mon Sep 17 00:00:00 2001
From: Robert Bradshaw <robertwb@gmail.com>
Date: Mon, 1 May 2017 13:13:26 -0700
Subject: [PATCH 29/33] add interval window coder spec

---
 sdks/python/apache_beam/runners/worker/operation_specs.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/sdks/python/apache_beam/runners/worker/operation_specs.py b/sdks/python/apache_beam/runners/worker/operation_specs.py
index 159156ff7da4..167e48b6b561 100644
--- a/sdks/python/apache_beam/runners/worker/operation_specs.py
+++ b/sdks/python/apache_beam/runners/worker/operation_specs.py
@@ -324,6 +324,10 @@ def get_coder_from_spec(coder_spec):
     value_coder, window_coder = [
         get_coder_from_spec(c) for c in coder_spec['component_encodings']]
     return coders.WindowedValueCoder(value_coder, window_coder=window_coder)
+  elif coder_spec['@type'] == 'kind:interval_window':
+    assert ('component_encodings' not in coder_spec
+            or len(coder_spec['component_encodings'] == 0))
+    return coders.IntervalWindowCoder()
   elif coder_spec['@type'] == 'kind:global_window':
     assert ('component_encodings' not in coder_spec
             or not coder_spec['component_encodings'])

From 2d0b2025be65dc49937a5d76459c5c2f1a750684 Mon Sep 17 00:00:00 2001
From: Robert Bradshaw <robertwb@gmail.com>
Date: Mon, 1 May 2017 13:17:56 -0700
Subject: [PATCH 30/33] Counter name updates.

---
 .../runners/portability/maptask_executor_runner.py         | 3 ++-
 sdks/python/apache_beam/runners/worker/operation_specs.py  | 7 +++++--
 2 files changed, 7 insertions(+), 3 deletions(-)

diff --git a/sdks/python/apache_beam/runners/portability/maptask_executor_runner.py b/sdks/python/apache_beam/runners/portability/maptask_executor_runner.py
index e5c62df44cab..d273e189e033 100644
--- a/sdks/python/apache_beam/runners/portability/maptask_executor_runner.py
+++ b/sdks/python/apache_beam/runners/portability/maptask_executor_runner.py
@@ -129,7 +129,8 @@ def execute_map_tasks(self, ordered_map_tasks):
       state_sampler = statesampler.StateSampler('%s-' % ix, counter_factory)
       map_executor = operations.SimpleMapTaskExecutor(
           operation_specs.MapTask(
-              all_operations, 'S%02d' % ix, system_names, stage_names),
+              all_operations, 'S%02d' % ix,
+              system_names, stage_names, system_names),
           counter_factory,
           state_sampler)
       self.executors.append(map_executor)
diff --git a/sdks/python/apache_beam/runners/worker/operation_specs.py b/sdks/python/apache_beam/runners/worker/operation_specs.py
index 167e48b6b561..977e165afe7c 100644
--- a/sdks/python/apache_beam/runners/worker/operation_specs.py
+++ b/sdks/python/apache_beam/runners/worker/operation_specs.py
@@ -350,15 +350,18 @@ class MapTask(object):
       within the map task.
     stage_name: The name of this map task execution stage.
     system_names: The system names of the step corresponding to each map task
-      operation.
+      operation in the execution graph.
     step_names: The names of the step corresponding to each map task operation.
+    original_names: The internal name of a step in the original workflow graph.
   """
 
-  def __init__(self, operations, stage_name, system_names, step_names):
+  def __init__(
+      self, operations, stage_name, system_names, step_names, original_names):
     self.operations = operations
     self.stage_name = stage_name
     self.system_names = system_names
     self.step_names = step_names
+    self.original_names = original_names
 
   def __str__(self):
     return '<%s %s steps=%s>' % (self.__class__.__name__, self.stage_name,

From 38e184299116fd193f9b14e7a9a9e7181a4c1aad Mon Sep 17 00:00:00 2001
From: Robert Bradshaw <robertwb@gmail.com>
Date: Mon, 1 May 2017 13:30:28 -0700
Subject: [PATCH 31/33] more lint

---
 sdks/python/apache_beam/runners/worker/log_handler_test.py | 1 +
 sdks/python/apache_beam/runners/worker/logger.py           | 1 +
 sdks/python/apache_beam/runners/worker/logger_test.py      | 1 +
 sdks/python/apache_beam/runners/worker/opcounters_test.py  | 1 +
 4 files changed, 4 insertions(+)

diff --git a/sdks/python/apache_beam/runners/worker/log_handler_test.py b/sdks/python/apache_beam/runners/worker/log_handler_test.py
index 600af4521bc1..565bedbd2385 100644
--- a/sdks/python/apache_beam/runners/worker/log_handler_test.py
+++ b/sdks/python/apache_beam/runners/worker/log_handler_test.py
@@ -84,6 +84,7 @@ def _verify_fn_log_handler(self, num_log_entries):
 
     self.assertEqual(num_received_log_entries, num_log_entries)
 
+
 # Test cases.
 data = {
     'one_batch': log_handler.FnApiLogRecordHandler._MAX_BATCH_SIZE - 47,
diff --git a/sdks/python/apache_beam/runners/worker/logger.py b/sdks/python/apache_beam/runners/worker/logger.py
index 6d68e066d2c1..217dc5835075 100644
--- a/sdks/python/apache_beam/runners/worker/logger.py
+++ b/sdks/python/apache_beam/runners/worker/logger.py
@@ -43,6 +43,7 @@ def get_data(self):
       all_data.update(datum)
     return all_data
 
+
 per_thread_worker_data = _PerThreadWorkerData()
 
 
diff --git a/sdks/python/apache_beam/runners/worker/logger_test.py b/sdks/python/apache_beam/runners/worker/logger_test.py
index fb1fb296163d..cf3f69292826 100644
--- a/sdks/python/apache_beam/runners/worker/logger_test.py
+++ b/sdks/python/apache_beam/runners/worker/logger_test.py
@@ -176,6 +176,7 @@ def test_exception_record(self):
     self.assertNotEqual(exn_output.find('logger_test.py'), -1)
     self.assertEqual(log_output, self.SAMPLE_OUTPUT)
 
+
 if __name__ == '__main__':
   logging.getLogger().setLevel(logging.INFO)
   unittest.main()
diff --git a/sdks/python/apache_beam/runners/worker/opcounters_test.py b/sdks/python/apache_beam/runners/worker/opcounters_test.py
index 6356bbbbf571..74561b81593e 100644
--- a/sdks/python/apache_beam/runners/worker/opcounters_test.py
+++ b/sdks/python/apache_beam/runners/worker/opcounters_test.py
@@ -143,6 +143,7 @@ def test_should_sample(self):
                           10 * total_runs / i,
                           buckets[i] / (10.0 * total_runs / i)))
 
+
 if __name__ == '__main__':
   logging.getLogger().setLevel(logging.INFO)
   unittest.main()

From 5b4af981de0caff30ccdd36377975c59201c43b9 Mon Sep 17 00:00:00 2001
From: Robert Bradshaw <robertwb@gmail.com>
Date: Mon, 1 May 2017 15:12:17 -0700
Subject: [PATCH 32/33] Setuptools doesn't understand ** expansion.'

'
---
 sdks/python/setup.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/sdks/python/setup.py b/sdks/python/setup.py
index 375358cfbeb2..f527362aaa07 100644
--- a/sdks/python/setup.py
+++ b/sdks/python/setup.py
@@ -121,7 +121,8 @@ def get_version():
     author=PACKAGE_AUTHOR,
     author_email=PACKAGE_EMAIL,
     packages=setuptools.find_packages(),
-    package_data={'apache_beam': ['**/*.pyx', '**/*.pxd', 'tests/data/*']},
+    package_data={'apache_beam': [
+        '*/*.pyx', '*/*/*.pyx', '*/*.pxd', '*/*/*.pxd', 'tests/data/*']},
     ext_modules=cythonize([
         '**/*.pyx',
         'apache_beam/coders/coder_impl.py',

From d6d7f5f05091146ac97d04ef4b7f4c50bfe0c809 Mon Sep 17 00:00:00 2001
From: Robert Bradshaw <robertwb@gmail.com>
Date: Mon, 1 May 2017 16:24:04 -0700
Subject: [PATCH 33/33] lint

---
 .../runners/portability/maptask_executor_runner_test.py          | 1 +
 1 file changed, 1 insertion(+)

diff --git a/sdks/python/apache_beam/runners/portability/maptask_executor_runner_test.py b/sdks/python/apache_beam/runners/portability/maptask_executor_runner_test.py
index c01c082fd628..6e13e73d572a 100644
--- a/sdks/python/apache_beam/runners/portability/maptask_executor_runner_test.py
+++ b/sdks/python/apache_beam/runners/portability/maptask_executor_runner_test.py
@@ -198,6 +198,7 @@ def test_windowing(self):
              | beam.Map(lambda (k, vs): (k, sorted(vs))))
       assert_that(res, equal_to([('k', [1, 2]), ('k', [100, 101, 102])]))
 
+
 if __name__ == '__main__':
   logging.getLogger().setLevel(logging.INFO)
   unittest.main()