From 53963b5f1220278d43cc9c474babcab3c49c87bf Mon Sep 17 00:00:00 2001 From: Petr Viktorin Date: Fri, 5 Apr 2024 13:55:59 +0200 Subject: [PATCH] gh-116608: importlib.resources: Un-deprecate functional API & add subdirectory support (GH-116609) --- Doc/library/importlib.resources.rst | 178 ++++++++++++++ Doc/whatsnew/3.13.rst | 39 ++-- Lib/importlib/resources/__init__.py | 17 ++ Lib/importlib/resources/_functional.py | 85 +++++++ .../resources/test_functional.py | 219 ++++++++++++++++++ ...-03-11-17-04-55.gh-issue-116608.30f58-.rst | 10 + 6 files changed, 533 insertions(+), 15 deletions(-) create mode 100644 Lib/importlib/resources/_functional.py create mode 100644 Lib/test/test_importlib/resources/test_functional.py create mode 100644 Misc/NEWS.d/next/Library/2024-03-11-17-04-55.gh-issue-116608.30f58-.rst diff --git a/Doc/library/importlib.resources.rst b/Doc/library/importlib.resources.rst index a5adf0b8546dbfc..9a5e4c76e7bd8f1 100644 --- a/Doc/library/importlib.resources.rst +++ b/Doc/library/importlib.resources.rst @@ -97,3 +97,181 @@ for example, a package and its resources can be imported from a zip file using .. versionchanged:: 3.12 Added support for *traversable* representing a directory. + + +.. _importlib_resources_functional: + +Functional API +^^^^^^^^^^^^^^ + +A set of simplified, backwards-compatible helpers is available. +These allow common operations in a single function call. + +For all the following functions: + +- *anchor* is an :class:`~importlib.resources.Anchor`, + as in :func:`~importlib.resources.files`. + Unlike in ``files``, it may not be omitted. + +- *path_names* are components of a resource's path name, relative to + the anchor. + For example, to get the text of resource named ``info.txt``, use:: + + importlib.resources.read_text(my_module, "info.txt") + + Like :meth:`Traversable.joinpath `, + The individual components should use forward slashes (``/``) + as path separators. + For example, the following are equivalent:: + + importlib.resources.read_binary(my_module, "pics/painting.png") + importlib.resources.read_binary(my_module, "pics", "painting.png") + + For backward compatibility reasons, functions that read text require + an explicit *encoding* argument if multiple *path_names* are given. + For example, to get the text of ``info/chapter1.txt``, use:: + + importlib.resources.read_text(my_module, "info", "chapter1.txt", + encoding='utf-8') + +.. function:: open_binary(anchor, *path_names) + + Open the named resource for binary reading. + + See :ref:`the introduction ` for + details on *anchor* and *path_names*. + + This function returns a :class:`~typing.BinaryIO` object, + that is, a binary stream open for reading. + + This function is roughly equivalent to:: + + files(anchor).joinpath(*path_names).open('rb') + + .. versionchanged:: 3.13 + Multiple *path_names* are accepted. + + +.. function:: open_text(anchor, *path_names, encoding='utf-8', errors='strict') + + Open the named resource for text reading. + By default, the contents are read as strict UTF-8. + + See :ref:`the introduction ` for + details on *anchor* and *path_names*. + *encoding* and *errors* have the same meaning as in built-in :func:`open`. + + For backward compatibility reasons, the *encoding* argument must be given + explicitly if there are multiple *path_names*. + This limitation is scheduled to be removed in Python 3.15. + + This function returns a :class:`~typing.TextIO` object, + that is, a text stream open for reading. + + This function is roughly equivalent to:: + + files(anchor).joinpath(*path_names).open('r', encoding=encoding) + + .. versionchanged:: 3.13 + Multiple *path_names* are accepted. + *encoding* and *errors* must be given as keyword arguments. + + +.. function:: read_binary(anchor, *path_names) + + Read and return the contents of the named resource as :class:`bytes`. + + See :ref:`the introduction ` for + details on *anchor* and *path_names*. + + This function is roughly equivalent to:: + + files(anchor).joinpath(*path_names).read_bytes() + + .. versionchanged:: 3.13 + Multiple *path_names* are accepted. + + +.. function:: read_text(anchor, *path_names, encoding='utf-8', errors='strict') + + Read and return the contents of the named resource as :class:`str`. + By default, the contents are read as strict UTF-8. + + See :ref:`the introduction ` for + details on *anchor* and *path_names*. + *encoding* and *errors* have the same meaning as in built-in :func:`open`. + + For backward compatibility reasons, the *encoding* argument must be given + explicitly if there are multiple *path_names*. + This limitation is scheduled to be removed in Python 3.15. + + This function is roughly equivalent to:: + + files(anchor).joinpath(*path_names).read_text(encoding=encoding) + + .. versionchanged:: 3.13 + Multiple *path_names* are accepted. + *encoding* and *errors* must be given as keyword arguments. + + +.. function:: path(anchor, *path_names) + + Provides the path to the *resource* as an actual file system path. This + function returns a context manager for use in a :keyword:`with` statement. + The context manager provides a :class:`pathlib.Path` object. + + Exiting the context manager cleans up any temporary files created, e.g. + when the resource needs to be extracted from a zip file. + + For example, the :meth:`~pathlib.Path.stat` method requires + an actual file system path; it can be used like this:: + + with importlib.resources.path(anchor, "resource.txt") as fspath: + result = fspath.stat() + + See :ref:`the introduction ` for + details on *anchor* and *path_names*. + + This function is roughly equivalent to:: + + as_file(files(anchor).joinpath(*path_names)) + + .. versionchanged:: 3.13 + Multiple *path_names* are accepted. + *encoding* and *errors* must be given as keyword arguments. + + +.. function:: is_resource(anchor, *path_names) + + Return ``True`` if the named resource exists, otherwise ``False``. + This function does not consider directories to be resources. + + See :ref:`the introduction ` for + details on *anchor* and *path_names*. + + This function is roughly equivalent to:: + + files(anchor).joinpath(*path_names).is_file() + + .. versionchanged:: 3.13 + Multiple *path_names* are accepted. + + +.. function:: contents(anchor, *path_names) + + Return an iterable over the named items within the package or path. + The iterable returns names of resources (e.g. files) and non-resources + (e.g. directories) as :class:`str`. + The iterable does not recurse into subdirectories. + + See :ref:`the introduction ` for + details on *anchor* and *path_names*. + + This function is roughly equivalent to:: + + for resource in files(anchor).joinpath(*path_names).iterdir(): + yield resource.name + + .. deprecated:: 3.11 + Prefer ``iterdir()`` as above, which offers more control over the + results and richer functionality. diff --git a/Doc/whatsnew/3.13.rst b/Doc/whatsnew/3.13.rst index 7f6a86efc61bf75..99a9545dd4e5861 100644 --- a/Doc/whatsnew/3.13.rst +++ b/Doc/whatsnew/3.13.rst @@ -409,6 +409,30 @@ and only logged in :ref:`Python Development Mode ` or on :ref:`Python built on debug mode `. (Contributed by Victor Stinner in :gh:`62948`.) +importlib +--------- + +Previously deprecated :mod:`importlib.resources` functions are un-deprecated: + + * :func:`~importlib.resources.is_resource()` + * :func:`~importlib.resources.open_binary()` + * :func:`~importlib.resources.open_text()` + * :func:`~importlib.resources.path()` + * :func:`~importlib.resources.read_binary()` + * :func:`~importlib.resources.read_text()` + +All now allow for a directory (or tree) of resources, using multiple positional +arguments. + +For text-reading functions, the *encoding* and *errors* must now be given as +keyword arguments. + +The :func:`~importlib.resources.contents()` remains deprecated in favor of +the full-featured :class:`~importlib.resources.abc.Traversable` API. +However, there is now no plan to remove it. + +(Contributed by Petr Viktorin in :gh:`106532`.) + ipaddress --------- @@ -1357,21 +1381,6 @@ configparser importlib --------- -* Remove :mod:`importlib.resources` deprecated methods: - - * ``contents()`` - * ``is_resource()`` - * ``open_binary()`` - * ``open_text()`` - * ``path()`` - * ``read_binary()`` - * ``read_text()`` - - Use :func:`importlib.resources.files()` instead. Refer to `importlib-resources: Migrating from Legacy - `_ - for migration advice. - (Contributed by Jason R. Coombs in :gh:`106532`.) - * Remove deprecated :meth:`~object.__getitem__` access for :class:`!importlib.metadata.EntryPoint` objects. (Contributed by Jason R. Coombs in :gh:`113175`.) diff --git a/Lib/importlib/resources/__init__.py b/Lib/importlib/resources/__init__.py index ae83cd07c4d4fbc..ec4441c91161188 100644 --- a/Lib/importlib/resources/__init__.py +++ b/Lib/importlib/resources/__init__.py @@ -7,6 +7,16 @@ Anchor, ) +from ._functional import ( + contents, + is_resource, + open_binary, + open_text, + path, + read_binary, + read_text, +) + from .abc import ResourceReader @@ -16,4 +26,11 @@ 'ResourceReader', 'as_file', 'files', + 'contents', + 'is_resource', + 'open_binary', + 'open_text', + 'path', + 'read_binary', + 'read_text', ] diff --git a/Lib/importlib/resources/_functional.py b/Lib/importlib/resources/_functional.py new file mode 100644 index 000000000000000..9e3ea1547d486a5 --- /dev/null +++ b/Lib/importlib/resources/_functional.py @@ -0,0 +1,85 @@ +"""Simplified function-based API for importlib.resources""" + +import warnings + +from ._common import files, as_file + + +_MISSING = object() + + +def open_binary(anchor, *path_names): + """Open for binary reading the *resource* within *package*.""" + return _get_resource(anchor, path_names).open('rb') + + +def open_text(anchor, *path_names, encoding=_MISSING, errors='strict'): + """Open for text reading the *resource* within *package*.""" + encoding = _get_encoding_arg(path_names, encoding) + resource = _get_resource(anchor, path_names) + return resource.open('r', encoding=encoding, errors=errors) + + +def read_binary(anchor, *path_names): + """Read and return contents of *resource* within *package* as bytes.""" + return _get_resource(anchor, path_names).read_bytes() + + +def read_text(anchor, *path_names, encoding=_MISSING, errors='strict'): + """Read and return contents of *resource* within *package* as str.""" + encoding = _get_encoding_arg(path_names, encoding) + resource = _get_resource(anchor, path_names) + return resource.read_text(encoding=encoding, errors=errors) + + +def path(anchor, *path_names): + """Return the path to the *resource* as an actual file system path.""" + return as_file(_get_resource(anchor, path_names)) + + +def is_resource(anchor, *path_names): + """Return ``True`` if there is a resource named *name* in the package, + + Otherwise returns ``False``. + """ + return _get_resource(anchor, path_names).is_file() + + +def contents(anchor, *path_names): + """Return an iterable over the named resources within the package. + + The iterable returns :class:`str` resources (e.g. files). + The iterable does not recurse into subdirectories. + """ + warnings.warn( + "importlib.resources.contents is deprecated. " + "Use files(anchor).iterdir() instead.", + DeprecationWarning, + stacklevel=1, + ) + return ( + resource.name + for resource + in _get_resource(anchor, path_names).iterdir() + ) + + +def _get_encoding_arg(path_names, encoding): + # For compatibility with versions where *encoding* was a positional + # argument, it needs to be given explicitly when there are multiple + # *path_names*. + # This limitation can be removed in Python 3.15. + if encoding is _MISSING: + if len(path_names) > 1: + raise TypeError( + "'encoding' argument required with multiple path names", + ) + else: + return 'utf-8' + return encoding + + +def _get_resource(anchor, path_names): + if anchor is None: + raise TypeError("anchor must be module or string, got None") + return files(anchor).joinpath(*path_names) diff --git a/Lib/test/test_importlib/resources/test_functional.py b/Lib/test/test_importlib/resources/test_functional.py new file mode 100644 index 000000000000000..fd02fc7c0e7b150 --- /dev/null +++ b/Lib/test/test_importlib/resources/test_functional.py @@ -0,0 +1,219 @@ +import unittest +import os + +from test.support.warnings_helper import ignore_warnings, check_warnings + +import importlib.resources as resources + +# Since the functional API forwards to Traversable, we only test +# filesystem resources here -- not zip files, namespace packages etc. +# We do test for two kinds of Anchor, though. + + +class StringAnchorMixin: + anchor01 = 'test.test_importlib.resources.data01' + anchor02 = 'test.test_importlib.resources.data02' + + +class ModuleAnchorMixin: + from . import data01 as anchor01 + from . import data02 as anchor02 + + +class FunctionalAPIBase: + def _gen_resourcetxt_path_parts(self): + """Yield various names of a text file in anchor02, each in a subTest + """ + for path_parts in ( + ('subdirectory', 'subsubdir', 'resource.txt'), + ('subdirectory/subsubdir/resource.txt',), + ('subdirectory/subsubdir', 'resource.txt'), + ): + with self.subTest(path_parts=path_parts): + yield path_parts + + def test_read_text(self): + self.assertEqual( + resources.read_text(self.anchor01, 'utf-8.file'), + 'Hello, UTF-8 world!\n', + ) + self.assertEqual( + resources.read_text( + self.anchor02, 'subdirectory', 'subsubdir', 'resource.txt', + encoding='utf-8', + ), + 'a resource', + ) + for path_parts in self._gen_resourcetxt_path_parts(): + self.assertEqual( + resources.read_text( + self.anchor02, *path_parts, encoding='utf-8', + ), + 'a resource', + ) + # Use generic OSError, since e.g. attempting to read a directory can + # fail with PermissionError rather than IsADirectoryError + with self.assertRaises(OSError): + resources.read_text(self.anchor01) + with self.assertRaises(OSError): + resources.read_text(self.anchor01, 'no-such-file') + with self.assertRaises(UnicodeDecodeError): + resources.read_text(self.anchor01, 'utf-16.file') + self.assertEqual( + resources.read_text( + self.anchor01, 'binary.file', encoding='latin1', + ), + '\x00\x01\x02\x03', + ) + self.assertEqual( + resources.read_text( + self.anchor01, 'utf-16.file', + errors='backslashreplace', + ), + 'Hello, UTF-16 world!\n'.encode('utf-16').decode( + errors='backslashreplace', + ), + ) + + def test_read_binary(self): + self.assertEqual( + resources.read_binary(self.anchor01, 'utf-8.file'), + b'Hello, UTF-8 world!\n', + ) + for path_parts in self._gen_resourcetxt_path_parts(): + self.assertEqual( + resources.read_binary(self.anchor02, *path_parts), + b'a resource', + ) + + def test_open_text(self): + with resources.open_text(self.anchor01, 'utf-8.file') as f: + self.assertEqual(f.read(), 'Hello, UTF-8 world!\n') + for path_parts in self._gen_resourcetxt_path_parts(): + with resources.open_text( + self.anchor02, *path_parts, + encoding='utf-8', + ) as f: + self.assertEqual(f.read(), 'a resource') + # Use generic OSError, since e.g. attempting to read a directory can + # fail with PermissionError rather than IsADirectoryError + with self.assertRaises(OSError): + resources.open_text(self.anchor01) + with self.assertRaises(OSError): + resources.open_text(self.anchor01, 'no-such-file') + with resources.open_text(self.anchor01, 'utf-16.file') as f: + with self.assertRaises(UnicodeDecodeError): + f.read() + with resources.open_text( + self.anchor01, 'binary.file', encoding='latin1', + ) as f: + self.assertEqual(f.read(), '\x00\x01\x02\x03') + with resources.open_text( + self.anchor01, 'utf-16.file', + errors='backslashreplace', + ) as f: + self.assertEqual( + f.read(), + 'Hello, UTF-16 world!\n'.encode('utf-16').decode( + errors='backslashreplace', + ), + ) + + def test_open_binary(self): + with resources.open_binary(self.anchor01, 'utf-8.file') as f: + self.assertEqual(f.read(), b'Hello, UTF-8 world!\n') + for path_parts in self._gen_resourcetxt_path_parts(): + with resources.open_binary( + self.anchor02, *path_parts, + ) as f: + self.assertEqual(f.read(), b'a resource') + + def test_path(self): + with resources.path(self.anchor01, 'utf-8.file') as path: + with open(str(path)) as f: + self.assertEqual(f.read(), 'Hello, UTF-8 world!\n') + with resources.path(self.anchor01) as path: + with open(os.path.join(path, 'utf-8.file')) as f: + self.assertEqual(f.read(), 'Hello, UTF-8 world!\n') + + def test_is_resource(self): + is_resource = resources.is_resource + self.assertTrue(is_resource(self.anchor01, 'utf-8.file')) + self.assertFalse(is_resource(self.anchor01, 'no_such_file')) + self.assertFalse(is_resource(self.anchor01)) + self.assertFalse(is_resource(self.anchor01, 'subdirectory')) + for path_parts in self._gen_resourcetxt_path_parts(): + self.assertTrue(is_resource(self.anchor02, *path_parts)) + + def test_contents(self): + is_resource = resources.is_resource + with check_warnings((".*contents.*", DeprecationWarning)): + c = resources.contents(self.anchor01) + self.assertGreaterEqual( + set(c), + {'utf-8.file', 'utf-16.file', 'binary.file', 'subdirectory'}, + ) + with ( + self.assertRaises(OSError), + check_warnings((".*contents.*", DeprecationWarning)), + ): + list(resources.contents(self.anchor01, 'utf-8.file')) + for path_parts in self._gen_resourcetxt_path_parts(): + with ( + self.assertRaises(OSError), + check_warnings((".*contents.*", DeprecationWarning)), + ): + list(resources.contents(self.anchor01, *path_parts)) + with check_warnings((".*contents.*", DeprecationWarning)): + c = resources.contents(self.anchor01, 'subdirectory') + self.assertGreaterEqual( + set(c), + {'binary.file'}, + ) + + @ignore_warnings(category=DeprecationWarning) + def test_common_errors(self): + for func in ( + resources.read_text, + resources.read_binary, + resources.open_text, + resources.open_binary, + resources.path, + resources.is_resource, + resources.contents, + ): + with self.subTest(func=func): + # Rejecting None anchor + with self.assertRaises(TypeError): + func(None) + # Rejecting invalid anchor type + with self.assertRaises((TypeError, AttributeError)): + func(1234) + # Unknown module + with self.assertRaises(ModuleNotFoundError): + func('$missing module$') + + def test_text_errors(self): + for func in ( + resources.read_text, + resources.open_text, + ): + with self.subTest(func=func): + # Multiple path arguments need explicit encoding argument. + with self.assertRaises(TypeError): + func( + self.anchor02, 'subdirectory', + 'subsubdir', 'resource.txt', + ) + + +class FunctionalAPITest_StringAnchor( + unittest.TestCase, FunctionalAPIBase, StringAnchorMixin, +): + pass + + +class FunctionalAPITest_ModuleAnchor( + unittest.TestCase, FunctionalAPIBase, ModuleAnchorMixin, +): + pass diff --git a/Misc/NEWS.d/next/Library/2024-03-11-17-04-55.gh-issue-116608.30f58-.rst b/Misc/NEWS.d/next/Library/2024-03-11-17-04-55.gh-issue-116608.30f58-.rst new file mode 100644 index 000000000000000..d1536bc47c3ee04 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2024-03-11-17-04-55.gh-issue-116608.30f58-.rst @@ -0,0 +1,10 @@ +The :mod:`importlib.resources` functions +:func:`~importlib.resources.is_resource()`, +:func:`~importlib.resources.open_binary()`, +:func:`~importlib.resources.open_text()`, +:func:`~importlib.resources.path()`, +:func:`~importlib.resources.read_binary()`, and +:func:`~importlib.resources.read_text()` are un-deprecated, and support +subdirectories via multiple positional arguments. +The :func:`~importlib.resources.contents()` function also allows subdirectories, +but remains deprecated.