diff --git a/python/pip_install/extract_wheels/lib/namespace_pkgs.py b/python/pip_install/extract_wheels/lib/namespace_pkgs.py index ca5ffb5b13..30729fe132 100644 --- a/python/pip_install/extract_wheels/lib/namespace_pkgs.py +++ b/python/pip_install/extract_wheels/lib/namespace_pkgs.py @@ -1,5 +1,6 @@ """Utility functions to discover python package types""" import os +import pathlib # supported in >= 3.4 import textwrap from typing import Set, List, Optional @@ -19,26 +20,30 @@ def implicit_namespace_packages( Returns: The set of directories found under root to be packages using the native namespace method. """ - namespace_pkg_dirs = set() - for dirpath, dirnames, filenames in os.walk(directory, topdown=True): - # We are only interested in dirs with no __init__.py file + namespace_pkg_dirs: Set[str] = set() + standard_pkg_dirs: Set[str] = set() + # Traverse bottom-up because a directory can be a namespace pkg because its child contains module files. + for dirpath, dirnames, filenames in os.walk(directory, topdown=False): if "__init__.py" in filenames: - dirnames[:] = [] # Remove dirnames from search + standard_pkg_dirs.add(dirpath) continue + elif ignored_dirnames: + is_ignored_dir = dirpath in ignored_dirnames + child_of_ignored_dir = any(d in pathlib.Path(dirpath).parents for d in ignored_dirnames) + if is_ignored_dir or child_of_ignored_dir: + continue - for ignored_dir in ignored_dirnames or []: - if ignored_dir in dirnames: - dirnames.remove(ignored_dir) - - non_empty_directory = dirnames or filenames + dir_includes_py_modules = _includes_python_modules(filenames) + parent_of_namespace_pkg = any(str(pathlib.Path(dirpath, d)) in namespace_pkg_dirs for d in dirnames) + parent_of_standard_pkg = any(str(pathlib.Path(dirpath, d)) in standard_pkg_dirs for d in dirnames) + parent_of_pkg = parent_of_namespace_pkg or parent_of_standard_pkg if ( - non_empty_directory + (dir_includes_py_modules or parent_of_pkg) and # The root of the directory should never be an implicit namespace dirpath != directory ): namespace_pkg_dirs.add(dirpath) - return namespace_pkg_dirs @@ -68,3 +73,29 @@ def add_pkgutil_style_namespace_pkg_init(dir_path: str) -> None: """ ) ) + + +def _includes_python_modules(files: List[str]) -> bool: + """ + In order to only transform directories that Python actually considers namespace pkgs + we need to detect if a directory includes Python modules. + + Which files are loadable as modules is extension based, and the particular set of extensions + varies by platform. + + See: + 1. https://github.com/python/cpython/blob/7d9d25dbedfffce61fc76bc7ccbfa9ae901bf56f/Lib/importlib/machinery.py#L19 + 2. PEP 420 -- Implicit Namespace Packages, Specification - https://www.python.org/dev/peps/pep-0420/#specification + 3. dynload_shlib.c and dynload_win.c in python/cpython. + """ + module_suffixes = { + ".py", # Source modules + ".pyc", # Compiled bytecode modules + ".so", # Unix extension modules + ".pyd" # https://docs.python.org/3/faq/windows.html#is-a-pyd-file-the-same-as-a-dll + } + return any( + pathlib.Path(f).suffix in module_suffixes + for f + in files + ) diff --git a/python/pip_install/extract_wheels/lib/namespace_pkgs_test.py b/python/pip_install/extract_wheels/lib/namespace_pkgs_test.py index 5eec5c3199..baec8b3fff 100644 --- a/python/pip_install/extract_wheels/lib/namespace_pkgs_test.py +++ b/python/pip_install/extract_wheels/lib/namespace_pkgs_test.py @@ -68,6 +68,87 @@ def test_empty_case(self) -> None: actual = namespace_pkgs.implicit_namespace_packages(directory.root()) self.assertEqual(actual, set()) + def test_ignores_non_module_files_in_directories(self) -> None: + directory = TempDir() + directory.add_file("foo/__init__.pyi") + directory.add_file("foo/py.typed") + + actual = namespace_pkgs.implicit_namespace_packages(directory.root()) + self.assertEqual(actual, set()) + + def test_parent_child_relationship_of_namespace_pkgs(self): + directory = TempDir() + directory.add_file("foo/bar/biff/my_module.py") + directory.add_file("foo/bar/biff/another_module.py") + + expected = { + directory.root() + "/foo", + directory.root() + "/foo/bar", + directory.root() + "/foo/bar/biff", + } + actual = namespace_pkgs.implicit_namespace_packages(directory.root()) + self.assertEqual(actual, expected) + + def test_parent_child_relationship_of_namespace_and_standard_pkgs(self): + directory = TempDir() + directory.add_file("foo/bar/biff/__init__.py") + directory.add_file("foo/bar/biff/another_module.py") + + expected = { + directory.root() + "/foo", + directory.root() + "/foo/bar", + } + actual = namespace_pkgs.implicit_namespace_packages(directory.root()) + self.assertEqual(actual, expected) + + def test_parent_child_relationship_of_namespace_and_nested_standard_pkgs(self): + directory = TempDir() + directory.add_file("foo/bar/__init__.py") + directory.add_file("foo/bar/biff/another_module.py") + directory.add_file("foo/bar/biff/__init__.py") + directory.add_file("foo/bar/boof/big_module.py") + directory.add_file("foo/bar/boof/__init__.py") + directory.add_file("fim/in_a_ns_pkg.py") + + expected = { + directory.root() + "/foo", + directory.root() + "/fim", + } + actual = namespace_pkgs.implicit_namespace_packages(directory.root()) + self.assertEqual(actual, expected) + + def test_recognized_all_nonstandard_module_types(self): + directory = TempDir() + directory.add_file("ayy/my_module.pyc") + directory.add_file("bee/ccc/dee/eee.so") + directory.add_file("eff/jee/aych.pyd") + + expected = { + directory.root() + "/ayy", + directory.root() + "/bee", + directory.root() + "/bee/ccc", + directory.root() + "/bee/ccc/dee", + directory.root() + "/eff", + directory.root() + "/eff/jee", + } + actual = namespace_pkgs.implicit_namespace_packages(directory.root()) + self.assertEqual(actual, expected) + + def test_skips_ignored_directories(self): + directory = TempDir() + directory.add_file("foo/boo/my_module.py") + directory.add_file("foo/bar/another_module.py") + + expected = { + directory.root() + "/foo", + directory.root() + "/foo/bar", + } + actual = namespace_pkgs.implicit_namespace_packages( + directory.root(), + ignored_dirnames=[directory.root() + "/foo/boo"], + ) + self.assertEqual(actual, expected) + if __name__ == "__main__": unittest.main()