Skip to content

Commit

Permalink
apacheGH-34868: [Python] Share docstrings between classes (apache#34894)
Browse files Browse the repository at this point in the history
### Rationale for this change

Python classes sometimes duplicate docstrings, but change one word such as class name. Add a decorator function as a utility to help deduplicate docstring descriptions. Only works in Python. Does not work in Cython due to this CPython issue python/cpython#91309.

### What changes are included in this PR?

Add a decorator `@ doc` that can copy, concatenate, and/or format docstrings between classes.

### Are these changes tested?

Tests added. 

```
>>> import pyarrow
>>> from pyarrow.filesystem import FileSystem, LocalFileSystem, DaskFileSystem, S3FSWrapper
>>> from pyarrow.hdfs import HadoopFileSystem
>>> for fs in [FileSystem, LocalFileSystem, DaskFileSystem, S3FSWrapper, HadoopFileSystem]:
...     print(fs.__name__)
...     print(fs.isdir.__doc__)
... 
FileSystem

        Return True if path is a directory.

        Parameters
        ----------
        path : str
            Path to check.
        
LocalFileSystem

Return True if path is a directory.

Parameters
----------
path : str
    Path to check.

DaskFileSystem

Return True if path is a directory.

Parameters
----------
path : str
    Path to check.

S3FSWrapper

Return True if path is a directory.

Parameters
----------
path : str
    Path to check.

HadoopFileSystem

Return True if path is a directory.

Parameters
----------
path : str
    Path to check.

```
Note that `FileSystem.isdir.__doc__` is not dedented because it does not use the `@ doc` decorator.

### Are there any user-facing changes?

No
* Closes: apache#34868

Authored-by: Dane Pitkin <dane@voltrondata.com>
Signed-off-by: Alenka Frim <frim.alenka@gmail.com>
  • Loading branch information
danepitkin committed Apr 24, 2023
1 parent f4bd43d commit cd14e20
Show file tree
Hide file tree
Showing 5 changed files with 294 additions and 347 deletions.
34 changes: 17 additions & 17 deletions python/pyarrow/filesystem.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
from os.path import join as pjoin

import pyarrow as pa
from pyarrow.util import implements, _stringify_path, _is_path_like, _DEPR_MSG
from pyarrow.util import doc, _stringify_path, _is_path_like, _DEPR_MSG


_FS_DEPR_MSG = _DEPR_MSG.format(
Expand Down Expand Up @@ -260,39 +260,39 @@ def get_instance(cls):
warnings.warn(_FS_DEPR_MSG, FutureWarning, stacklevel=2)
return cls._get_instance()

@implements(FileSystem.ls)
@doc(FileSystem.ls)
def ls(self, path):
path = _stringify_path(path)
return sorted(pjoin(path, x) for x in os.listdir(path))

@implements(FileSystem.mkdir)
@doc(FileSystem.mkdir)
def mkdir(self, path, create_parents=True):
path = _stringify_path(path)
if create_parents:
os.makedirs(path)
else:
os.mkdir(path)

@implements(FileSystem.isdir)
@doc(FileSystem.isdir)
def isdir(self, path):
path = _stringify_path(path)
return os.path.isdir(path)

@implements(FileSystem.isfile)
@doc(FileSystem.isfile)
def isfile(self, path):
path = _stringify_path(path)
return os.path.isfile(path)

@implements(FileSystem._isfilestore)
@doc(FileSystem._isfilestore)
def _isfilestore(self):
return True

@implements(FileSystem.exists)
@doc(FileSystem.exists)
def exists(self, path):
path = _stringify_path(path)
return os.path.exists(path)

@implements(FileSystem.open)
@doc(FileSystem.open)
def open(self, path, mode='rb'):
"""
Open file for reading or writing.
Expand Down Expand Up @@ -324,41 +324,41 @@ def __init__(self, fs):
FutureWarning, stacklevel=2)
self.fs = fs

@implements(FileSystem.isdir)
@doc(FileSystem.isdir)
def isdir(self, path):
raise NotImplementedError("Unsupported file system API")

@implements(FileSystem.isfile)
@doc(FileSystem.isfile)
def isfile(self, path):
raise NotImplementedError("Unsupported file system API")

@implements(FileSystem._isfilestore)
@doc(FileSystem._isfilestore)
def _isfilestore(self):
"""
Object Stores like S3 and GCSFS are based on key lookups, not true
file-paths.
"""
return False

@implements(FileSystem.delete)
@doc(FileSystem.delete)
def delete(self, path, recursive=False):
path = _stringify_path(path)
return self.fs.rm(path, recursive=recursive)

@implements(FileSystem.exists)
@doc(FileSystem.exists)
def exists(self, path):
path = _stringify_path(path)
return self.fs.exists(path)

@implements(FileSystem.mkdir)
@doc(FileSystem.mkdir)
def mkdir(self, path, create_parents=True):
path = _stringify_path(path)
if create_parents:
return self.fs.mkdirs(path)
else:
return self.fs.mkdir(path)

@implements(FileSystem.open)
@doc(FileSystem.open)
def open(self, path, mode='rb'):
"""
Open file for reading or writing.
Expand All @@ -380,7 +380,7 @@ def walk(self, path):

class S3FSWrapper(DaskFileSystem):

@implements(FileSystem.isdir)
@doc(FileSystem.isdir)
def isdir(self, path):
path = _sanitize_s3(_stringify_path(path))
try:
Expand All @@ -392,7 +392,7 @@ def isdir(self, path):
except OSError:
return False

@implements(FileSystem.isfile)
@doc(FileSystem.isfile)
def isfile(self, path):
path = _sanitize_s3(_stringify_path(path))
try:
Expand Down
12 changes: 6 additions & 6 deletions python/pyarrow/hdfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
import sys
import warnings

from pyarrow.util import implements, _DEPR_MSG
from pyarrow.util import doc, _DEPR_MSG
from pyarrow.filesystem import FileSystem
import pyarrow._hdfsio as _hdfsio

Expand Down Expand Up @@ -58,15 +58,15 @@ def _isfilestore(self):
"""
return True

@implements(FileSystem.isdir)
@doc(FileSystem.isdir)
def isdir(self, path):
return super().isdir(path)

@implements(FileSystem.isfile)
@doc(FileSystem.isfile)
def isfile(self, path):
return super().isfile(path)

@implements(FileSystem.delete)
@doc(FileSystem.delete)
def delete(self, path, recursive=False):
return super().delete(path, recursive)

Expand All @@ -85,11 +85,11 @@ def mkdir(self, path, **kwargs):
"""
return super().mkdir(path)

@implements(FileSystem.rename)
@doc(FileSystem.rename)
def rename(self, path, new_path):
return super().rename(path, new_path)

@implements(FileSystem.exists)
@doc(FileSystem.exists)
def exists(self, path):
return super().exists(path)

Expand Down
161 changes: 159 additions & 2 deletions python/pyarrow/tests/test_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,171 @@
import gc
import signal
import sys
import textwrap
import weakref

import pytest

from pyarrow import util
from pyarrow.util import doc, _break_traceback_cycle_from_frame
from pyarrow.tests.util import disabled_gc


@doc(method="func_a", operation="A")
def func_a(whatever):
"""
This is the {method} method.
It computes {operation}.
"""
pass


@doc(
func_a,
textwrap.dedent(
"""
Examples
--------
>>> func_b()
B
"""
),
method="func_b",
operation="B",
)
def func_b(whatever):
pass


@doc(
func_a,
method="func_c",
operation="C",
)
def func_c(whatever):
"""
Examples
--------
>>> func_c()
C
"""
pass


@doc(func_a, method="func_d", operation="D")
def func_d(whatever):
pass


@doc(func_d, method="func_e", operation="E")
def func_e(whatever):
pass


@doc(method="func_f")
def func_f(whatever):
"""
This is the {method} method.
{{ We can escape curly braces like this. }}
Examples
--------
We should replace curly brace usage in doctests.
>>> dict(x = "x", y = "y")
>>> set((1, 2, 3))
"""
pass


def test_docstring_formatting():
docstr = textwrap.dedent(
"""
This is the func_a method.
It computes A.
"""
)
assert func_a.__doc__ == docstr


def test_docstring_concatenation():
docstr = textwrap.dedent(
"""
This is the func_b method.
It computes B.
Examples
--------
>>> func_b()
B
"""
)
assert func_b.__doc__ == docstr


def test_docstring_append():
docstr = textwrap.dedent(
"""
This is the func_c method.
It computes C.
Examples
--------
>>> func_c()
C
"""
)
assert func_c.__doc__ == docstr


def test_docstring_template_from_callable():
docstr = textwrap.dedent(
"""
This is the func_d method.
It computes D.
"""
)
assert func_d.__doc__ == docstr


def test_inherit_docstring_template_from_callable():
docstr = textwrap.dedent(
"""
This is the func_e method.
It computes E.
"""
)
assert func_e.__doc__ == docstr


def test_escaping_in_docstring():
docstr = textwrap.dedent(
"""
This is the func_f method.
{ We can escape curly braces like this. }
Examples
--------
We should replace curly brace usage in doctests.
>>> dict(x = "x", y = "y")
>>> set((1, 2, 3))
"""
)
assert func_f.__doc__ == docstr


def exhibit_signal_refcycle():
# Put an object in the frame locals and return a weakref to it.
# If `signal.getsignal` has a bug where it creates a reference cycle
Expand All @@ -48,5 +205,5 @@ def test_signal_refcycle():
with disabled_gc():
wr = exhibit_signal_refcycle()
assert wr() is not None
util._break_traceback_cycle_from_frame(sys._getframe(0))
_break_traceback_cycle_from_frame(sys._getframe(0))
assert wr() is None
Loading

0 comments on commit cd14e20

Please sign in to comment.