apache · aaltay · Aug 3, 2018 · Jun 21, 2018 · Jul 7, 2018 · Jul 12, 2018
diff --git a/sdks/python/apache_beam/pipeline_test.py b/sdks/python/apache_beam/pipeline_test.py
@@ -520,11 +520,11 @@ def test_dir(self):
     options = Breakfast()
     self.assertEquals(
         set(['from_dictionary', 'get_all_options', 'slices', 'style',
-             'view_as', 'display_data']),
+             'view_as', 'display_data', 'next']),
         set([attr for attr in dir(options) if not attr.startswith('_')]))
     self.assertEquals(
         set(['from_dictionary', 'get_all_options', 'style', 'view_as',
-             'display_data']),
+             'display_data', 'next']),
         set([attr for attr in dir(options.view_as(Eggs))
              if not attr.startswith('_')]))
 

diff --git a/sdks/python/apache_beam/transforms/__init__.py b/sdks/python/apache_beam/transforms/__init__.py
@@ -18,6 +18,8 @@
 """PTransform and descendants."""
 
 # pylint: disable=wildcard-import
+from __future__ import absolute_import
+
 from apache_beam.transforms import combiners
 from apache_beam.transforms.core import *
 from apache_beam.transforms.ptransform import *

diff --git a/sdks/python/apache_beam/transforms/combiners.py b/sdks/python/apache_beam/transforms/combiners.py
@@ -18,9 +18,12 @@
 """A library of basic combiner PTransform subclasses."""
 
 from __future__ import absolute_import
+from __future__ import division
 
 import operator
 import random
+from builtins import object
+from builtins import zip
 
 from past.builtins import long
 

diff --git a/sdks/python/apache_beam/transforms/combiners_test.py b/sdks/python/apache_beam/transforms/combiners_test.py
@@ -16,12 +16,15 @@
 #
 
 """Unit tests for our libraries of combine PTransforms."""
+from __future__ import absolute_import
+from __future__ import division
 
 import itertools
 import random
 import unittest
 
 import hamcrest as hc
+from future.builtins import range
 
 import apache_beam as beam
 import apache_beam.transforms.combiners as combine
@@ -286,7 +289,7 @@ def match(actual):
     def matcher():
       def match(actual):
         equal_to([1])([len(actual)])
-        equal_to(pairs)(actual[0].iteritems())
+        equal_to(pairs)(actual[0].items())
       return match
     assert_that(result, matcher())
     pipeline.run()

diff --git a/sdks/python/apache_beam/transforms/core.py b/sdks/python/apache_beam/transforms/core.py
@@ -21,12 +21,15 @@
 
 import copy
 import inspect
-import itertools
 import random
 import re
 import types
+from builtins import map
+from builtins import object
+from builtins import range
 
-from six import string_types
+from future.builtins import filter
+from past.builtins import unicode
 
 from apache_beam import coders
 from apache_beam import pvalue
@@ -82,7 +85,6 @@
     'Impulse',
     ]
 
-
 # Type variables
 T = typehints.TypeVariable('T')
 K = typehints.TypeVariable('K')
@@ -291,6 +293,9 @@ def __eq__(self, other):
       return self.param_id == other.param_id
     return False
 
+  def __hash__(self):
+    return hash(self.param_id)
+
   def __repr__(self):
     return self.param_id
 
@@ -698,7 +703,7 @@ def merge_accumulators(self, accumulators, *args, **kwargs):
 
     class ReiterableNonEmptyAccumulators(object):
       def __iter__(self):
-        return itertools.ifilter(filter_fn, accumulators)
+        return filter(filter_fn, accumulators)
 
     # It's (weakly) assumed that self._fn is associative.
     return self._fn(ReiterableNonEmptyAccumulators(), *args, **kwargs)
@@ -902,7 +907,8 @@ def with_outputs(self, *tags, **main_kw):
     """
     main_tag = main_kw.pop('main', None)
     if main_kw:
-      raise ValueError('Unexpected keyword arguments: %s' % main_kw.keys())
+      raise ValueError('Unexpected keyword arguments: %s' %
+                       list(main_kw))
     return _MultiParDo(self, tags, main_tag)
 
   def _pardo_fn_data(self):
@@ -1666,7 +1672,6 @@ def expand(self, pcoll):
 
 
 class Windowing(object):
-
   def __init__(self, windowfn, triggerfn=None, accumulation_mode=None,
                timestamp_combiner=None):
     global AccumulationMode, DefaultTrigger  # pylint: disable=global-variable-not-assigned
@@ -1712,6 +1717,10 @@ def __eq__(self, other):
           and self.timestamp_combiner == other.timestamp_combiner)
     return False
 
+  def __hash__(self):
+    return hash((self.windowfn, self.accumulation_mode,
+                 self.timestamp_combiner))
+
   def is_default(self):
     return self._is_default
 
@@ -1792,7 +1801,7 @@ def __init__(self, windowfn, **kwargs):
     accumulation_mode = kwargs.pop('accumulation_mode', None)
     timestamp_combiner = kwargs.pop('timestamp_combiner', None)
     if kwargs:
-      raise ValueError('Unexpected keyword arguments: %s' % kwargs.keys())
+      raise ValueError('Unexpected keyword arguments: %s' % list(kwargs))
     self.windowing = Windowing(
         windowfn, triggerfn, accumulation_mode, timestamp_combiner)
     super(WindowInto, self).__init__(self.WindowIntoFn(self.windowing))
@@ -1861,7 +1870,7 @@ def __init__(self, **kwargs):
     super(Flatten, self).__init__()
     self.pipeline = kwargs.pop('pipeline', None)
     if kwargs:
-      raise ValueError('Unexpected keyword arguments: %s' % kwargs.keys())
+      raise ValueError('Unexpected keyword arguments: %s' % list(kwargs))
 
   def _extract_input_pvalues(self, pvalueish):
     try:
@@ -1906,7 +1915,7 @@ def __init__(self, value):
       value: An object of values for the PCollection
     """
     super(Create, self).__init__()
-    if isinstance(value, string_types):
+    if isinstance(value, (unicode, str, bytes)):
       raise TypeError('PTransform Create: Refusing to treat string as '
                       'an iterable. (string=%r)' % value)
     elif isinstance(value, dict):
@@ -1941,7 +1950,7 @@ def get_windowing(self, unused_inputs):
 
   @staticmethod
   def _create_source_from_iterable(values, coder):
-    return Create._create_source(map(coder.encode, values), coder)
+    return Create._create_source(list(map(coder.encode, values)), coder)
 
   @staticmethod
   def _create_source(serialized_values, coder):

diff --git a/sdks/python/apache_beam/transforms/create_source.py b/sdks/python/apache_beam/transforms/create_source.py
@@ -15,6 +15,13 @@
 # limitations under the License.
 #
 
+from __future__ import absolute_import
+from __future__ import division
+
+from builtins import map
+from builtins import next
+from builtins import range
+
 from apache_beam.io import iobase
 from apache_beam.transforms.core import Create
 
@@ -57,15 +64,15 @@ def split(self, desired_bundle_size, start_position=None,
         start_position = 0
       if stop_position is None:
         stop_position = len(self._serialized_values)
-      avg_size_per_value = self._total_size / len(self._serialized_values)
+      avg_size_per_value = self._total_size // len(self._serialized_values)
       num_values_per_split = max(
-          int(desired_bundle_size / avg_size_per_value), 1)
+          int(desired_bundle_size // avg_size_per_value), 1)
       start = start_position
       while start < stop_position:
         end = min(start + num_values_per_split, stop_position)
         remaining = stop_position - end
         # Avoid having a too small bundle at the end.
-        if remaining < (num_values_per_split / 4):
+        if remaining < (num_values_per_split // 4):
           end = stop_position
         sub_source = Create._create_source(
             self._serialized_values[start:end], self._coder)

diff --git a/sdks/python/apache_beam/transforms/create_test.py b/sdks/python/apache_beam/transforms/create_test.py
@@ -16,8 +16,12 @@
 #
 
 """Unit tests for the Create and _CreateSource classes."""
+from __future__ import absolute_import
+from __future__ import division
+
 import logging
 import unittest
+from builtins import range
 
 from apache_beam import Create
 from apache_beam.coders import FastPrimitivesCoder
@@ -33,13 +37,13 @@ def setUp(self):
 
   def test_create_transform(self):
     with TestPipeline() as p:
-      assert_that(p | Create(range(10)), equal_to(range(10)))
+      assert_that(p | Create(list(range(10))), equal_to(list(range(10))))
 
   def test_create_source_read(self):
     self.check_read([], self.coder)
     self.check_read([1], self.coder)
     # multiple values.
-    self.check_read(range(10), self.coder)
+    self.check_read(list(range(10)), self.coder)
 
   def check_read(self, values, coder):
     source = Create._create_source_from_iterable(values, coder)
@@ -49,7 +53,7 @@ def check_read(self, values, coder):
   def test_create_source_read_with_initial_splits(self):
     self.check_read_with_initial_splits([], self.coder, num_splits=2)
     self.check_read_with_initial_splits([1], self.coder, num_splits=2)
-    values = range(8)
+    values = list(range(8))
     # multiple values with a single split.
     self.check_read_with_initial_splits(values, self.coder, num_splits=1)
     # multiple values with a single split with a large desired bundle size
@@ -70,7 +74,7 @@ def check_read_with_initial_splits(self, values, coder, num_splits):
     from the split sources.
     """
     source = Create._create_source_from_iterable(values, coder)
-    desired_bundle_size = source._total_size / num_splits
+    desired_bundle_size = source._total_size // num_splits
     splits = source.split(desired_bundle_size)
     splits_info = [
         (split.source, split.start_position, split.stop_position)

diff --git a/sdks/python/apache_beam/transforms/cy_combiners.py b/sdks/python/apache_beam/transforms/cy_combiners.py
@@ -15,12 +15,17 @@
 # limitations under the License.
 #
 
+# cython: language_level=3
+
 """A library of basic cythonized CombineFn subclasses.
 
 For internal use only; no backwards-compatibility guarantees.
 """
 
 from __future__ import absolute_import
+from __future__ import division
+
+from builtins import object
 
 from apache_beam.transforms import core
 
@@ -162,7 +167,7 @@ def extract_output(self):
       self.sum %= 2**64
       if self.sum >= INT64_MAX:
         self.sum -= 2**64
-    return self.sum / self.count if self.count else _NAN
+    return self.sum // self.count if self.count else _NAN
 
 
 class CountCombineFn(AccumulatorCombineFn):
@@ -258,7 +263,7 @@ def merge(self, accumulators):
       self.count += accumulator.count
 
   def extract_output(self):
-    return self.sum / self.count if self.count else _NAN
+    return self.sum // self.count if self.count else _NAN
 
 
 class SumFloatFn(AccumulatorCombineFn):

diff --git a/sdks/python/apache_beam/transforms/dataflow_distribution_counter_test.py b/sdks/python/apache_beam/transforms/dataflow_distribution_counter_test.py
@@ -14,6 +14,8 @@
 otherwise, test on pure python module
 """
 
+from __future__ import absolute_import
+
 import unittest
 
 from mock import Mock

diff --git a/sdks/python/apache_beam/transforms/display.py b/sdks/python/apache_beam/transforms/display.py
@@ -41,10 +41,11 @@
 import calendar
 import inspect
 import json
+from builtins import object
 from datetime import datetime
 from datetime import timedelta
 
-import six
+from past.builtins import unicode
 
 __all__ = ['HasDisplayData', 'DisplayDataItem', 'DisplayData']
 
@@ -169,7 +170,7 @@ class DisplayDataItem(object):
   display item belongs to.
   """
   typeDict = {str:'STRING',
-              six.text_type:'STRING',
+              unicode:'STRING',
               int:'INTEGER',
               float:'FLOAT',
               bool: 'BOOLEAN',

diff --git a/sdks/python/apache_beam/transforms/display_test.py b/sdks/python/apache_beam/transforms/display_test.py
@@ -24,8 +24,8 @@
 
 # pylint: disable=ungrouped-imports
 import hamcrest as hc
-import six
 from hamcrest.core.base_matcher import BaseMatcher
+from past.builtins import unicode
 
 import apache_beam as beam
 from apache_beam.options.pipeline_options import PipelineOptions
@@ -165,7 +165,7 @@ def test_create_list_display_data(self):
   def test_unicode_type_display_data(self):
     class MyDoFn(beam.DoFn):
       def display_data(self):
-        return {'unicode_string': six.text_type('my string'),
+        return {'unicode_string': unicode('my string'),
                 'unicode_literal_string': u'my literal string'}
 
     fn = MyDoFn()

diff --git a/sdks/python/apache_beam/transforms/ptransform.py b/sdks/python/apache_beam/transforms/ptransform.py
@@ -43,6 +43,9 @@ class and wrapper class that allows lambda functions to be used as
 import os
 import sys
 import threading
+from builtins import hex
+from builtins import object
+from builtins import zip
 from functools import reduce
 
 from google.protobuf import message
@@ -622,7 +625,7 @@ def __init__(self, fn, *args, **kwargs):
     super(PTransformWithSideInputs, self).__init__()
 
     if (any([isinstance(v, pvalue.PCollection) for v in args]) or
-        any([isinstance(v, pvalue.PCollection) for v in kwargs.itervalues()])):
+        any([isinstance(v, pvalue.PCollection) for v in kwargs.values()])):
       raise error.SideInputError(
           'PCollection used directly as side input argument. Specify '
           'AsIter(pcollection) or AsSingleton(pcollection) to indicate how the '

diff --git a/sdks/python/apache_beam/transforms/ptransform_test.py b/sdks/python/apache_beam/transforms/ptransform_test.py
@@ -18,12 +18,16 @@
 """Unit tests for the PTransform and descendants."""
 
 from __future__ import absolute_import
+from __future__ import division
 from __future__ import print_function
 
 import collections
 import operator
 import re
 import unittest
+from builtins import map
+from builtins import range
+from builtins import zip
 from functools import reduce
 
 import hamcrest as hc
@@ -382,7 +386,7 @@ def test_combine_with_combine_fn(self):
     pipeline = TestPipeline()
     pcoll = pipeline | 'Start' >> beam.Create(vals)
     result = pcoll | 'Mean' >> beam.CombineGlobally(self._MeanCombineFn())
-    assert_that(result, equal_to([sum(vals) / len(vals)]))
+    assert_that(result, equal_to([sum(vals) // len(vals)]))
     pipeline.run()
 
   def test_combine_with_callable(self):
@@ -413,8 +417,8 @@ def test_combine_per_key_with_combine_fn(self):
     pcoll = pipeline | 'Start' >> beam.Create(([('a', x) for x in vals_1] +
                                                [('b', x) for x in vals_2]))
     result = pcoll | 'Mean' >> beam.CombinePerKey(self._MeanCombineFn())
-    assert_that(result, equal_to([('a', sum(vals_1) / len(vals_1)),
-                                  ('b', sum(vals_2) / len(vals_2))]))
+    assert_that(result, equal_to([('a', sum(vals_1) // len(vals_1)),
+                                  ('b', sum(vals_2) // len(vals_2))]))
     pipeline.run()
 
   def test_combine_per_key_with_callable(self):