From 551919a359e3ef16db15e2e10f98ef258f655a33 Mon Sep 17 00:00:00 2001 From: Tibor Kiss Date: Sat, 11 Mar 2017 07:13:04 +0100 Subject: [PATCH 1/2] [BEAM-1694] Fix docstring inaccuracies in Python-SDK --- sdks/python/apache_beam/coders/typecoders.py | 2 +- sdks/python/apache_beam/io/avroio.py | 10 ++++++---- sdks/python/apache_beam/io/gcp/gcsio.py | 4 +++- sdks/python/apache_beam/pipeline.py | 1 + sdks/python/apache_beam/transforms/core.py | 2 +- sdks/python/apache_beam/typehints/trivial_inference.py | 4 +++- sdks/python/apache_beam/utils/counters.py | 5 ++--- sdks/python/apache_beam/utils/retry.py | 4 ++-- 8 files changed, 19 insertions(+), 13 deletions(-) diff --git a/sdks/python/apache_beam/coders/typecoders.py b/sdks/python/apache_beam/coders/typecoders.py index 767d79195f54..1bd4312705f2 100644 --- a/sdks/python/apache_beam/coders/typecoders.py +++ b/sdks/python/apache_beam/coders/typecoders.py @@ -160,7 +160,7 @@ def verify_deterministic(self, key_coder, op_name, silent=True): class FirstOf(object): - "A class used to get the first matching coder from a list of coders." + """A class used to get the first matching coder from a list of coders.""" def __init__(self, coders): self._coders = coders diff --git a/sdks/python/apache_beam/io/avroio.py b/sdks/python/apache_beam/io/avroio.py index ab985307ecd7..b7e10f3269cb 100644 --- a/sdks/python/apache_beam/io/avroio.py +++ b/sdks/python/apache_beam/io/avroio.py @@ -67,13 +67,11 @@ def __init__(self, file_pattern=None, min_bundle_size=0, validate=True): {u'name': u'Alyssa', u'favorite_number': 256, u'favorite_color': None}). Args: - label: label of the PTransform. file_pattern: the set of files to be read. min_bundle_size: the minimum size in bytes, to be considered when splitting the input into bundles. validate: flag to verify that the files exist during the pipeline creation time. - **kwargs: Additional keyword arguments to be passed to the base class. """ super(ReadFromAvro, self).__init__() self._source = _AvroSource(file_pattern, min_bundle_size, validate=validate) @@ -129,6 +127,12 @@ def read_block_from_file(f, codec, schema, expected_sync_marker): Args: f: Avro file to read. + codec: Decompression codec to use. + Supported codecs: 'null', 'deflate', 'snappy' + schema: Avro Schema definition represented as JSON string. + expected_sync_marker: Avro synchronization marker. + If the block's sync marker does not match with this parameter then + ValueError is thrown. Returns: A single _AvroBlock. @@ -302,8 +306,6 @@ def __init__(self, codec: The codec to use for block-level compression. Any string supported by the Avro specification is accepted (for example 'null'). file_name_suffix: Suffix for the files written. - append_trailing_newlines: indicate whether this sink should write an - additional newline char after writing each element. num_shards: The number of files (shards) used for output. If not set, the service will decide on the optimal number of shards. Constraining the number of shards is likely to reduce diff --git a/sdks/python/apache_beam/io/gcp/gcsio.py b/sdks/python/apache_beam/io/gcp/gcsio.py index cf00bb2cdd5c..020c38f4f535 100644 --- a/sdks/python/apache_beam/io/gcp/gcsio.py +++ b/sdks/python/apache_beam/io/gcp/gcsio.py @@ -155,6 +155,8 @@ def glob(self, pattern, limit=None): Args: pattern: GCS file path pattern in the form gs:///. + limit: Maximal number of path names to return. + All matching paths are returned if set to None. Returns: list of GCS file paths matching the given pattern. @@ -370,7 +372,7 @@ def size_of_files_in_glob(self, pattern): """Returns the size of all the files in the glob as a dictionary Args: - path: a file path pattern that reads the size of all the files + pattern: a file path pattern that reads the size of all the files """ bucket, name_pattern = parse_gcs_path(pattern) # Get the prefix with which we can list objects in the given bucket. diff --git a/sdks/python/apache_beam/pipeline.py b/sdks/python/apache_beam/pipeline.py index 7db39a9aa781..dc05bd3ed546 100644 --- a/sdks/python/apache_beam/pipeline.py +++ b/sdks/python/apache_beam/pipeline.py @@ -191,6 +191,7 @@ def apply(self, transform, pvalueish=None, label=None): Args: transform: the PTranform to apply. pvalueish: the input for the PTransform (typically a PCollection). + label: label of the PTransform. Raises: TypeError: if the transform object extracted from the argument list is diff --git a/sdks/python/apache_beam/transforms/core.py b/sdks/python/apache_beam/transforms/core.py index 32516711a356..7a52828bee80 100644 --- a/sdks/python/apache_beam/transforms/core.py +++ b/sdks/python/apache_beam/transforms/core.py @@ -521,7 +521,7 @@ class PartitionFn(WithTypeHints): def default_label(self): return self.__class__.__name__ - def partition_for(self, context, num_partitions, *args, **kwargs): + def partition_for(self, element, num_partitions, *args, **kwargs): """Specify which partition will receive this element. Args: diff --git a/sdks/python/apache_beam/typehints/trivial_inference.py b/sdks/python/apache_beam/typehints/trivial_inference.py index e1fbc4233e48..4e4a1814c7ee 100644 --- a/sdks/python/apache_beam/typehints/trivial_inference.py +++ b/sdks/python/apache_beam/typehints/trivial_inference.py @@ -218,9 +218,10 @@ def infer_return_type(c, input_types, debug=False, depth=5): """Analyses a callable to deduce its return type. Args: - f: A Python function object to infer the return type of. + c: A Python callable to infer the return type of. input_types: A sequence of inputs corresponding to the input types. debug: Whether to print verbose debugging information. + depth: Maximal inspection depth of function calls during type inference Returns: A TypeConstraint that that the return value of this function will (likely) @@ -268,6 +269,7 @@ def infer_return_type_func(f, input_types, debug=False, depth=0): f: A Python function object to infer the return type of. input_types: A sequence of inputs corresponding to the input types. debug: Whether to print verbose debugging information. + depth: Maximal inspection depth of function calls during type inference Returns: A TypeConstraint that that the return value of this function will (likely) diff --git a/sdks/python/apache_beam/utils/counters.py b/sdks/python/apache_beam/utils/counters.py index f6b432a97efd..e41d732a02f6 100644 --- a/sdks/python/apache_beam/utils/counters.py +++ b/sdks/python/apache_beam/utils/counters.py @@ -37,9 +37,8 @@ class Counter(object): Attributes: name: the name of the counter, a string - aggregation_kind: one of the aggregation kinds defined by this class. - total: the total size of all the items passed to update() - elements: the number of times update() was called + combine_fn: the CombineFn to use for aggregation + accumulator: the accumulator created for the combine_fn """ # Handy references to common counters. diff --git a/sdks/python/apache_beam/utils/retry.py b/sdks/python/apache_beam/utils/retry.py index 05973c558905..1c5f99aa2d99 100644 --- a/sdks/python/apache_beam/utils/retry.py +++ b/sdks/python/apache_beam/utils/retry.py @@ -59,7 +59,7 @@ class FuzzedExponentialIntervals(object): fuzz: A value between 0 and 1, indicating the fraction of fuzz. For a given delay d, the fuzzed delay is randomly chosen between [(1 - fuzz) * d, d]. - max_delay_sec: Maximum delay (in seconds). After this limit is reached, + max_delay_secs: Maximum delay (in seconds). After this limit is reached, further tries use max_delay_sec instead of exponentially increasing the time. Defaults to 1 hour. """ @@ -143,7 +143,7 @@ def with_exponential_backoff( can be used so that the delays are not randomized. factor: The exponential factor to use on subsequent retries. Default is 2 (doubling). - max_delay_sec: Maximum delay (in seconds). After this limit is reached, + max_delay_secs: Maximum delay (in seconds). After this limit is reached, further tries use max_delay_sec instead of exponentially increasing the time. Defaults to 4 hours. From 1f2ac2c7bd4d4e50d5f19aa75f6a3c14bb3b63b2 Mon Sep 17 00:00:00 2001 From: Tibor Kiss Date: Tue, 14 Mar 2017 19:30:21 +0100 Subject: [PATCH 2/2] Addressing reviewer's comments --- sdks/python/apache_beam/io/avroio.py | 7 +++---- sdks/python/apache_beam/typehints/trivial_inference.py | 4 ++-- sdks/python/apache_beam/utils/retry.py | 2 +- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/sdks/python/apache_beam/io/avroio.py b/sdks/python/apache_beam/io/avroio.py index b7e10f3269cb..6fdd79804545 100644 --- a/sdks/python/apache_beam/io/avroio.py +++ b/sdks/python/apache_beam/io/avroio.py @@ -127,12 +127,11 @@ def read_block_from_file(f, codec, schema, expected_sync_marker): Args: f: Avro file to read. - codec: Decompression codec to use. + codec: The codec to use for block-level decompression. Supported codecs: 'null', 'deflate', 'snappy' schema: Avro Schema definition represented as JSON string. - expected_sync_marker: Avro synchronization marker. - If the block's sync marker does not match with this parameter then - ValueError is thrown. + expected_sync_marker: Avro synchronization marker. If the block's sync + marker does not match with this parameter then ValueError is thrown. Returns: A single _AvroBlock. diff --git a/sdks/python/apache_beam/typehints/trivial_inference.py b/sdks/python/apache_beam/typehints/trivial_inference.py index 4e4a1814c7ee..b61110339bf9 100644 --- a/sdks/python/apache_beam/typehints/trivial_inference.py +++ b/sdks/python/apache_beam/typehints/trivial_inference.py @@ -221,7 +221,7 @@ def infer_return_type(c, input_types, debug=False, depth=5): c: A Python callable to infer the return type of. input_types: A sequence of inputs corresponding to the input types. debug: Whether to print verbose debugging information. - depth: Maximal inspection depth of function calls during type inference + depth: Maximum inspection depth during type inference. Returns: A TypeConstraint that that the return value of this function will (likely) @@ -269,7 +269,7 @@ def infer_return_type_func(f, input_types, debug=False, depth=0): f: A Python function object to infer the return type of. input_types: A sequence of inputs corresponding to the input types. debug: Whether to print verbose debugging information. - depth: Maximal inspection depth of function calls during type inference + depth: Maximum inspection depth during type inference. Returns: A TypeConstraint that that the return value of this function will (likely) diff --git a/sdks/python/apache_beam/utils/retry.py b/sdks/python/apache_beam/utils/retry.py index 1c5f99aa2d99..8f7152ab675a 100644 --- a/sdks/python/apache_beam/utils/retry.py +++ b/sdks/python/apache_beam/utils/retry.py @@ -145,7 +145,7 @@ def with_exponential_backoff( Default is 2 (doubling). max_delay_secs: Maximum delay (in seconds). After this limit is reached, further tries use max_delay_sec instead of exponentially increasing - the time. Defaults to 4 hours. + the time. Defaults to 1 hour. Returns: As per Python decorators with arguments pattern returns a decorator