diff --git a/python/pyarrow/filesystem.py b/python/pyarrow/filesystem.py index c2017e42b2409..c1e70a1ee699f 100644 --- a/python/pyarrow/filesystem.py +++ b/python/pyarrow/filesystem.py @@ -25,7 +25,7 @@ from os.path import join as pjoin import pyarrow as pa -from pyarrow.util import implements, _stringify_path, _is_path_like, _DEPR_MSG +from pyarrow.util import doc, _stringify_path, _is_path_like, _DEPR_MSG _FS_DEPR_MSG = _DEPR_MSG.format( @@ -260,12 +260,12 @@ def get_instance(cls): warnings.warn(_FS_DEPR_MSG, FutureWarning, stacklevel=2) return cls._get_instance() - @implements(FileSystem.ls) + @doc(FileSystem.ls) def ls(self, path): path = _stringify_path(path) return sorted(pjoin(path, x) for x in os.listdir(path)) - @implements(FileSystem.mkdir) + @doc(FileSystem.mkdir) def mkdir(self, path, create_parents=True): path = _stringify_path(path) if create_parents: @@ -273,26 +273,26 @@ def mkdir(self, path, create_parents=True): else: os.mkdir(path) - @implements(FileSystem.isdir) + @doc(FileSystem.isdir) def isdir(self, path): path = _stringify_path(path) return os.path.isdir(path) - @implements(FileSystem.isfile) + @doc(FileSystem.isfile) def isfile(self, path): path = _stringify_path(path) return os.path.isfile(path) - @implements(FileSystem._isfilestore) + @doc(FileSystem._isfilestore) def _isfilestore(self): return True - @implements(FileSystem.exists) + @doc(FileSystem.exists) def exists(self, path): path = _stringify_path(path) return os.path.exists(path) - @implements(FileSystem.open) + @doc(FileSystem.open) def open(self, path, mode='rb'): """ Open file for reading or writing. @@ -324,15 +324,15 @@ def __init__(self, fs): FutureWarning, stacklevel=2) self.fs = fs - @implements(FileSystem.isdir) + @doc(FileSystem.isdir) def isdir(self, path): raise NotImplementedError("Unsupported file system API") - @implements(FileSystem.isfile) + @doc(FileSystem.isfile) def isfile(self, path): raise NotImplementedError("Unsupported file system API") - @implements(FileSystem._isfilestore) + @doc(FileSystem._isfilestore) def _isfilestore(self): """ Object Stores like S3 and GCSFS are based on key lookups, not true @@ -340,17 +340,17 @@ def _isfilestore(self): """ return False - @implements(FileSystem.delete) + @doc(FileSystem.delete) def delete(self, path, recursive=False): path = _stringify_path(path) return self.fs.rm(path, recursive=recursive) - @implements(FileSystem.exists) + @doc(FileSystem.exists) def exists(self, path): path = _stringify_path(path) return self.fs.exists(path) - @implements(FileSystem.mkdir) + @doc(FileSystem.mkdir) def mkdir(self, path, create_parents=True): path = _stringify_path(path) if create_parents: @@ -358,7 +358,7 @@ def mkdir(self, path, create_parents=True): else: return self.fs.mkdir(path) - @implements(FileSystem.open) + @doc(FileSystem.open) def open(self, path, mode='rb'): """ Open file for reading or writing. @@ -380,7 +380,7 @@ def walk(self, path): class S3FSWrapper(DaskFileSystem): - @implements(FileSystem.isdir) + @doc(FileSystem.isdir) def isdir(self, path): path = _sanitize_s3(_stringify_path(path)) try: @@ -392,7 +392,7 @@ def isdir(self, path): except OSError: return False - @implements(FileSystem.isfile) + @doc(FileSystem.isfile) def isfile(self, path): path = _sanitize_s3(_stringify_path(path)) try: diff --git a/python/pyarrow/hdfs.py b/python/pyarrow/hdfs.py index 56667bd5df57d..2e6c387a8fde3 100644 --- a/python/pyarrow/hdfs.py +++ b/python/pyarrow/hdfs.py @@ -21,7 +21,7 @@ import sys import warnings -from pyarrow.util import implements, _DEPR_MSG +from pyarrow.util import doc, _DEPR_MSG from pyarrow.filesystem import FileSystem import pyarrow._hdfsio as _hdfsio @@ -58,15 +58,15 @@ def _isfilestore(self): """ return True - @implements(FileSystem.isdir) + @doc(FileSystem.isdir) def isdir(self, path): return super().isdir(path) - @implements(FileSystem.isfile) + @doc(FileSystem.isfile) def isfile(self, path): return super().isfile(path) - @implements(FileSystem.delete) + @doc(FileSystem.delete) def delete(self, path, recursive=False): return super().delete(path, recursive) @@ -85,11 +85,11 @@ def mkdir(self, path, **kwargs): """ return super().mkdir(path) - @implements(FileSystem.rename) + @doc(FileSystem.rename) def rename(self, path, new_path): return super().rename(path, new_path) - @implements(FileSystem.exists) + @doc(FileSystem.exists) def exists(self, path): return super().exists(path) diff --git a/python/pyarrow/tests/test_util.py b/python/pyarrow/tests/test_util.py index 2b351a53442d5..9fccb76112d9c 100644 --- a/python/pyarrow/tests/test_util.py +++ b/python/pyarrow/tests/test_util.py @@ -18,14 +18,171 @@ import gc import signal import sys +import textwrap import weakref import pytest -from pyarrow import util +from pyarrow.util import doc, _break_traceback_cycle_from_frame from pyarrow.tests.util import disabled_gc +@doc(method="func_a", operation="A") +def func_a(whatever): + """ + This is the {method} method. + + It computes {operation}. + """ + pass + + +@doc( + func_a, + textwrap.dedent( + """ + Examples + -------- + + >>> func_b() + B + """ + ), + method="func_b", + operation="B", +) +def func_b(whatever): + pass + + +@doc( + func_a, + method="func_c", + operation="C", +) +def func_c(whatever): + """ + Examples + -------- + + >>> func_c() + C + """ + pass + + +@doc(func_a, method="func_d", operation="D") +def func_d(whatever): + pass + + +@doc(func_d, method="func_e", operation="E") +def func_e(whatever): + pass + + +@doc(method="func_f") +def func_f(whatever): + """ + This is the {method} method. + + {{ We can escape curly braces like this. }} + + Examples + -------- + We should replace curly brace usage in doctests. + + >>> dict(x = "x", y = "y") + >>> set((1, 2, 3)) + """ + pass + + +def test_docstring_formatting(): + docstr = textwrap.dedent( + """ + This is the func_a method. + + It computes A. + """ + ) + assert func_a.__doc__ == docstr + + +def test_docstring_concatenation(): + docstr = textwrap.dedent( + """ + This is the func_b method. + + It computes B. + + Examples + -------- + + >>> func_b() + B + """ + ) + assert func_b.__doc__ == docstr + + +def test_docstring_append(): + docstr = textwrap.dedent( + """ + This is the func_c method. + + It computes C. + + Examples + -------- + + >>> func_c() + C + """ + ) + assert func_c.__doc__ == docstr + + +def test_docstring_template_from_callable(): + docstr = textwrap.dedent( + """ + This is the func_d method. + + It computes D. + """ + ) + assert func_d.__doc__ == docstr + + +def test_inherit_docstring_template_from_callable(): + docstr = textwrap.dedent( + """ + This is the func_e method. + + It computes E. + """ + ) + assert func_e.__doc__ == docstr + + +def test_escaping_in_docstring(): + docstr = textwrap.dedent( + """ + This is the func_f method. + + { We can escape curly braces like this. } + + Examples + -------- + We should replace curly brace usage in doctests. + + >>> dict(x = "x", y = "y") + >>> set((1, 2, 3)) + """ + ) + assert func_f.__doc__ == docstr + + def exhibit_signal_refcycle(): # Put an object in the frame locals and return a weakref to it. # If `signal.getsignal` has a bug where it creates a reference cycle @@ -48,5 +205,5 @@ def test_signal_refcycle(): with disabled_gc(): wr = exhibit_signal_refcycle() assert wr() is not None - util._break_traceback_cycle_from_frame(sys._getframe(0)) + _break_traceback_cycle_from_frame(sys._getframe(0)) assert wr() is None diff --git a/python/pyarrow/types.py b/python/pyarrow/types.py index a88ec2ad7e3f5..5d7dbe4b451b9 100644 --- a/python/pyarrow/types.py +++ b/python/pyarrow/types.py @@ -23,6 +23,7 @@ is_float_value) import pyarrow.lib as lib +from pyarrow.util import doc _SIGNED_INTEGER_TYPES = {lib.Type_INT8, lib.Type_INT16, lib.Type_INT32, @@ -43,9 +44,10 @@ lib.Type_MAP} | _UNION_TYPES +@doc(datatype="null") def is_null(t): """ - Return True if value is an instance of a null type. + Return True if value is an instance of type: {datatype}. Parameters ---------- @@ -54,351 +56,165 @@ def is_null(t): return t.id == lib.Type_NA +@doc(is_null, datatype="boolean") def is_boolean(t): - """ - Return True if value is an instance of a boolean type. - - Parameters - ---------- - t : DataType - """ return t.id == lib.Type_BOOL +@doc(is_null, datatype="any integer") def is_integer(t): - """ - Return True if value is an instance of any integer type. - - Parameters - ---------- - t : DataType - """ return t.id in _INTEGER_TYPES +@doc(is_null, datatype="signed integer") def is_signed_integer(t): - """ - Return True if value is an instance of any signed integer type. - - Parameters - ---------- - t : DataType - """ return t.id in _SIGNED_INTEGER_TYPES +@doc(is_null, datatype="unsigned integer") def is_unsigned_integer(t): - """ - Return True if value is an instance of any unsigned integer type. - - Parameters - ---------- - t : DataType - """ return t.id in _UNSIGNED_INTEGER_TYPES +@doc(is_null, datatype="int8") def is_int8(t): - """ - Return True if value is an instance of an int8 type. - - Parameters - ---------- - t : DataType - """ return t.id == lib.Type_INT8 +@doc(is_null, datatype="int16") def is_int16(t): - """ - Return True if value is an instance of an int16 type. - - Parameters - ---------- - t : DataType - """ return t.id == lib.Type_INT16 +@doc(is_null, datatype="int32") def is_int32(t): - """ - Return True if value is an instance of an int32 type. - - Parameters - ---------- - t : DataType - """ return t.id == lib.Type_INT32 +@doc(is_null, datatype="int64") def is_int64(t): - """ - Return True if value is an instance of an int64 type. - - Parameters - ---------- - t : DataType - """ return t.id == lib.Type_INT64 +@doc(is_null, datatype="uint8") def is_uint8(t): - """ - Return True if value is an instance of an uint8 type. - - Parameters - ---------- - t : DataType - """ return t.id == lib.Type_UINT8 +@doc(is_null, datatype="uint16") def is_uint16(t): - """ - Return True if value is an instance of an uint16 type. - - Parameters - ---------- - t : DataType - """ return t.id == lib.Type_UINT16 +@doc(is_null, datatype="uint32") def is_uint32(t): - """ - Return True if value is an instance of an uint32 type. - - Parameters - ---------- - t : DataType - """ return t.id == lib.Type_UINT32 +@doc(is_null, datatype="uint64") def is_uint64(t): - """ - Return True if value is an instance of an uint64 type. - - Parameters - ---------- - t : DataType - """ return t.id == lib.Type_UINT64 +@doc(is_null, datatype="floating point numeric") def is_floating(t): - """ - Return True if value is an instance of a floating point numeric type. - - Parameters - ---------- - t : DataType - """ return t.id in _FLOATING_TYPES +@doc(is_null, datatype="float16 (half-precision)") def is_float16(t): - """ - Return True if value is an instance of a float16 (half-precision) type. - - Parameters - ---------- - t : DataType - """ return t.id == lib.Type_HALF_FLOAT +@doc(is_null, datatype="float32 (single precision)") def is_float32(t): - """ - Return True if value is an instance of a float32 (single precision) type. - - Parameters - ---------- - t : DataType - """ return t.id == lib.Type_FLOAT +@doc(is_null, datatype="float64 (double precision)") def is_float64(t): - """ - Return True if value is an instance of a float64 (double precision) type. - - Parameters - ---------- - t : DataType - """ return t.id == lib.Type_DOUBLE +@doc(is_null, datatype="list") def is_list(t): - """ - Return True if value is an instance of a list type. - - Parameters - ---------- - t : DataType - """ return t.id == lib.Type_LIST +@doc(is_null, datatype="large list") def is_large_list(t): - """ - Return True if value is an instance of a large list type. - - Parameters - ---------- - t : DataType - """ return t.id == lib.Type_LARGE_LIST +@doc(is_null, datatype="fixed size list") def is_fixed_size_list(t): - """ - Return True if value is an instance of a fixed size list type. - - Parameters - ---------- - t : DataType - """ return t.id == lib.Type_FIXED_SIZE_LIST +@doc(is_null, datatype="struct") def is_struct(t): - """ - Return True if value is an instance of a struct type. - - Parameters - ---------- - t : DataType - """ return t.id == lib.Type_STRUCT +@doc(is_null, datatype="union") def is_union(t): - """ - Return True if value is an instance of a union type. - - Parameters - ---------- - t : DataType - """ return t.id in _UNION_TYPES +@doc(is_null, datatype="nested type") def is_nested(t): - """ - Return True if value is an instance of a nested type. - - Parameters - ---------- - t : DataType - """ return t.id in _NESTED_TYPES +@doc(is_null, datatype="run-end encoded") def is_run_end_encoded(t): - """ - Return True if value is an instance of a run-end encoded type. - - Parameters - ---------- - t : DataType - """ return t.id == lib.Type_RUN_END_ENCODED +@doc(is_null, datatype="date, time, timestamp or duration") def is_temporal(t): - """ - Return True if value is an instance of date, time, timestamp or duration. - - Parameters - ---------- - t : DataType - """ return t.id in _TEMPORAL_TYPES +@doc(is_null, datatype="timestamp") def is_timestamp(t): - """ - Return True if value is an instance of a timestamp type. - - Parameters - ---------- - t : DataType - """ return t.id == lib.Type_TIMESTAMP +@doc(is_null, datatype="duration") def is_duration(t): - """ - Return True if value is an instance of a duration type. - - Parameters - ---------- - t : DataType - """ return t.id == lib.Type_DURATION +@doc(is_null, datatype="time") def is_time(t): - """ - Return True if value is an instance of a time type. - - Parameters - ---------- - t : DataType - """ return t.id in _TIME_TYPES +@doc(is_null, datatype="time32") def is_time32(t): - """ - Return True if value is an instance of a time32 type. - - Parameters - ---------- - t : DataType - """ return t.id == lib.Type_TIME32 +@doc(is_null, datatype="time64") def is_time64(t): - """ - Return True if value is an instance of a time64 type. - - Parameters - ---------- - t : DataType - """ return t.id == lib.Type_TIME64 +@doc(is_null, datatype="variable-length binary") def is_binary(t): - """ - Return True if value is an instance of a variable-length binary type. - - Parameters - ---------- - t : DataType - """ return t.id == lib.Type_BINARY +@doc(is_null, datatype="large variable-length binary") def is_large_binary(t): - """ - Return True if value is an instance of a large variable-length - binary type. - - Parameters - ---------- - t : DataType - """ return t.id == lib.Type_LARGE_BINARY +@doc(method="is_string") def is_unicode(t): """ - Alias for is_string. + Alias for {method}. Parameters ---------- @@ -407,155 +223,71 @@ def is_unicode(t): return is_string(t) +@doc(is_null, datatype="string (utf8 unicode)") def is_string(t): - """ - Return True if value is an instance of string (utf8 unicode) type. - - Parameters - ---------- - t : DataType - """ return t.id == lib.Type_STRING +@doc(is_unicode, method="is_large_string") def is_large_unicode(t): - """ - Alias for is_large_string. - - Parameters - ---------- - t : DataType - """ return is_large_string(t) +@doc(is_null, datatype="large string (utf8 unicode)") def is_large_string(t): - """ - Return True if value is an instance of large string (utf8 unicode) type. - - Parameters - ---------- - t : DataType - """ return t.id == lib.Type_LARGE_STRING +@doc(is_null, datatype="fixed size binary") def is_fixed_size_binary(t): - """ - Return True if value is an instance of a fixed size binary type. - - Parameters - ---------- - t : DataType - """ return t.id == lib.Type_FIXED_SIZE_BINARY +@doc(is_null, datatype="date") def is_date(t): - """ - Return True if value is an instance of a date type. - - Parameters - ---------- - t : DataType - """ return t.id in _DATE_TYPES +@doc(is_null, datatype="date32 (days)") def is_date32(t): - """ - Return True if value is an instance of a date32 (days) type. - - Parameters - ---------- - t : DataType - """ return t.id == lib.Type_DATE32 +@doc(is_null, datatype="date64 (milliseconds)") def is_date64(t): - """ - Return True if value is an instance of a date64 (milliseconds) type. - - Parameters - ---------- - t : DataType - """ return t.id == lib.Type_DATE64 +@doc(is_null, datatype="map") def is_map(t): - """ - Return True if value is an instance of a map logical type. - - Parameters - ---------- - t : DataType - """ return t.id == lib.Type_MAP +@doc(is_null, datatype="decimal") def is_decimal(t): - """ - Return True if value is an instance of a decimal type. - - Parameters - ---------- - t : DataType - """ return t.id in _DECIMAL_TYPES +@doc(is_null, datatype="decimal128") def is_decimal128(t): - """ - Return True if value is an instance of a decimal type. - - Parameters - ---------- - t : DataType - """ return t.id == lib.Type_DECIMAL128 +@doc(is_null, datatype="decimal256") def is_decimal256(t): - """ - Return True if value is an instance of a decimal type. - - Parameters - ---------- - t : DataType - """ return t.id == lib.Type_DECIMAL256 +@doc(is_null, datatype="dictionary-encoded") def is_dictionary(t): - """ - Return True if value is an instance of a dictionary-encoded type. - - Parameters - ---------- - t : DataType - """ return t.id == lib.Type_DICTIONARY +@doc(is_null, datatype="interval") def is_interval(t): - """ - Return True if the value is an instance of an interval type. - - Parameters - ---------- - t : DateType - """ return t.id == lib.Type_INTERVAL_MONTH_DAY_NANO +@doc(is_null, datatype="primitive type") def is_primitive(t): - """ - Return True if the value is an instance of a primitive type. - - Parameters - ---------- - t : DataType - """ return lib._is_primitive(t.id) diff --git a/python/pyarrow/util.py b/python/pyarrow/util.py index 0e0f3e7265083..4f178aefc5e3d 100644 --- a/python/pyarrow/util.py +++ b/python/pyarrow/util.py @@ -23,6 +23,7 @@ import gc import socket import sys +import textwrap import types import warnings @@ -32,10 +33,67 @@ ) -def implements(f): - def decorator(g): - g.__doc__ = f.__doc__ - return g +def doc(*docstrings, **params): + """ + A decorator that takes docstring templates, concatenates them, and finally + performs string substitution on them. + This decorator will add a variable "_docstring_components" to the wrapped + callable to keep track of the original docstring template for potential future use. + If the docstring is a template, it will be saved as a string. + Otherwise, it will be saved as a callable and the docstring will be obtained via + the __doc__ attribute. + This decorator can not be used on Cython classes due to a CPython constraint, + which enforces the __doc__ attribute to be read-only. + See https://github.com/python/cpython/issues/91309 + + Parameters + ---------- + *docstrings : None, str, or callable + The string / docstring / docstring template to be prepended in order + before the default docstring under the callable. + **params + The key/value pairs used to format the docstring template. + """ + + def decorator(decorated): + docstring_components = [] + + # collect docstrings and docstring templates + for docstring in docstrings: + if docstring is None: + continue + if hasattr(docstring, "_docstring_components"): + docstring_components.extend( + docstring._docstring_components + ) + elif isinstance(docstring, str) or docstring.__doc__: + docstring_components.append(docstring) + + # append the callable's docstring last + if decorated.__doc__: + docstring_components.append(textwrap.dedent(decorated.__doc__)) + + params_applied = [ + component.format(**params) + if isinstance(component, str) and len(params) > 0 + else component + for component in docstring_components + ] + + decorated.__doc__ = "".join( + [ + component + if isinstance(component, str) + else textwrap.dedent(component.__doc__ or "") + for component in params_applied + ] + ) + + decorated._docstring_components = ( + docstring_components + ) + return decorated + return decorator