From 1176320cc9c37502a89166d7ff94d46513bf9d90 Mon Sep 17 00:00:00 2001 From: Jonathon Belotti Date: Wed, 26 May 2021 10:18:36 +1000 Subject: [PATCH 1/6] implement _includes_python_modules to help properly detect namespace pkgs --- .../extract_wheels/lib/namespace_pkgs.py | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/python/pip_install/extract_wheels/lib/namespace_pkgs.py b/python/pip_install/extract_wheels/lib/namespace_pkgs.py index ca5ffb5b13..3aae3d266e 100644 --- a/python/pip_install/extract_wheels/lib/namespace_pkgs.py +++ b/python/pip_install/extract_wheels/lib/namespace_pkgs.py @@ -1,5 +1,6 @@ """Utility functions to discover python package types""" import os +import pathlib # supported in >= 3.4 import textwrap from typing import Set, List, Optional @@ -68,3 +69,29 @@ def add_pkgutil_style_namespace_pkg_init(dir_path: str) -> None: """ ) ) + + +def _includes_python_modules(files: List[str]) -> bool: + """ + In order to only transform directories that Python actually considers namespace pkgs + we need to detect if a directory includes Python modules. + + Which files are loadable as modules is extension based, and the particular set of extensions + varies by platform. + + See: + 1. https://github.com/python/cpython/blob/7d9d25dbedfffce61fc76bc7ccbfa9ae901bf56f/Lib/importlib/machinery.py#L19 + 2. PEP 420 -- Implicit Namespace Packages, Specification - https://www.python.org/dev/peps/pep-0420/#specification + 3. dynload_shlib.c and dynload_win.c in python/cpython. + """ + module_suffixes = { + ".py", # Source modules + ".pyc", # Compiled bytecode modules + ".so", # Unix extension modules + ".pyd" # https://docs.python.org/3/faq/windows.html#is-a-pyd-file-the-same-as-a-dll + } + return any( + pathlib.Path(f).suffix in module_suffixes + for f + in files + ) From c361e789a246c81649dc8d9700d29b8a32a3d00f Mon Sep 17 00:00:00 2001 From: Jonathon Belotti Date: Sun, 6 Jun 2021 22:06:03 +1000 Subject: [PATCH 2/6] Resolve https://github.com/bazelbuild/rules_python/issues/381 (testcase additions included) --- .../extract_wheels/lib/namespace_pkgs.py | 23 ++++---- .../extract_wheels/lib/namespace_pkgs_test.py | 53 +++++++++++++++++++ 2 files changed, 65 insertions(+), 11 deletions(-) diff --git a/python/pip_install/extract_wheels/lib/namespace_pkgs.py b/python/pip_install/extract_wheels/lib/namespace_pkgs.py index 3aae3d266e..dc2c8a2b14 100644 --- a/python/pip_install/extract_wheels/lib/namespace_pkgs.py +++ b/python/pip_install/extract_wheels/lib/namespace_pkgs.py @@ -20,26 +20,27 @@ def implicit_namespace_packages( Returns: The set of directories found under root to be packages using the native namespace method. """ - namespace_pkg_dirs = set() - for dirpath, dirnames, filenames in os.walk(directory, topdown=True): + namespace_pkg_dirs: Set[str] = set() + # Traverse bottom-up because a directory can be a namespace pkg because its child contains module files. + for dirpath, dirnames, filenames in os.walk(directory, topdown=False): # We are only interested in dirs with no __init__.py file if "__init__.py" in filenames: - dirnames[:] = [] # Remove dirnames from search continue - - for ignored_dir in ignored_dirnames or []: - if ignored_dir in dirnames: - dirnames.remove(ignored_dir) - - non_empty_directory = dirnames or filenames + elif ignored_dirnames: + is_ignored_dir = dirpath in ignored_dirnames + child_of_ignored_dir = any(d in pathlib.Path(dirpath).parents for d in ignored_dirnames) + if is_ignored_dir or child_of_ignored_dir: + continue + + non_empty_package = _includes_python_modules(filenames) + parent_of_namespace_pkg = any(str(pathlib.Path(dirpath, d)) in namespace_pkg_dirs for d in dirnames) if ( - non_empty_directory + (non_empty_package or parent_of_namespace_pkg) and # The root of the directory should never be an implicit namespace dirpath != directory ): namespace_pkg_dirs.add(dirpath) - return namespace_pkg_dirs diff --git a/python/pip_install/extract_wheels/lib/namespace_pkgs_test.py b/python/pip_install/extract_wheels/lib/namespace_pkgs_test.py index 5eec5c3199..6f97f06613 100644 --- a/python/pip_install/extract_wheels/lib/namespace_pkgs_test.py +++ b/python/pip_install/extract_wheels/lib/namespace_pkgs_test.py @@ -68,6 +68,59 @@ def test_empty_case(self) -> None: actual = namespace_pkgs.implicit_namespace_packages(directory.root()) self.assertEqual(actual, set()) + def test_ignores_non_module_files_in_directories(self) -> None: + directory = TempDir() + directory.add_file("foo/__init__.pyi") + directory.add_file("foo/py.typed") + + actual = namespace_pkgs.implicit_namespace_packages(directory.root()) + self.assertEqual(actual, set()) + + def test_parent_child_relationship_of_namespace_pkgs(self): + directory = TempDir() + directory.add_file("foo/bar/biff/my_module.py") + directory.add_file("foo/bar/biff/another_module.py") + + expected = { + directory.root() + "/foo", + directory.root() + "/foo/bar", + directory.root() + "/foo/bar/biff", + } + actual = namespace_pkgs.implicit_namespace_packages(directory.root()) + self.assertEqual(actual, expected) + + def test_recognized_all_nonstandard_module_types(self): + directory = TempDir() + directory.add_file("ayy/my_module.pyc") + directory.add_file("bee/ccc/dee/eee.so") + directory.add_file("eff/jee/aych.pyd") + + expected = { + directory.root() + "/ayy", + directory.root() + "/bee", + directory.root() + "/bee/ccc", + directory.root() + "/bee/ccc/dee", + directory.root() + "/eff", + directory.root() + "/eff/jee", + } + actual = namespace_pkgs.implicit_namespace_packages(directory.root()) + self.assertEqual(actual, expected) + + def test_skips_ignored_directories(self): + directory = TempDir() + directory.add_file("foo/boo/my_module.py") + directory.add_file("foo/bar/another_module.py") + + expected = { + directory.root() + "/foo", + directory.root() + "/foo/bar", + } + actual = namespace_pkgs.implicit_namespace_packages( + directory.root(), + ignored_dirnames=[directory.root() + "/foo/boo"], + ) + self.assertEqual(actual, expected) + if __name__ == "__main__": unittest.main() From cc044074ba44a730cedab1e391b1d092b726e99b Mon Sep 17 00:00:00 2001 From: Jonathon Belotti Date: Tue, 8 Jun 2021 17:54:51 +1000 Subject: [PATCH 3/6] add failing testcases for unhandled scenario --- .../extract_wheels/lib/namespace_pkgs_test.py | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) diff --git a/python/pip_install/extract_wheels/lib/namespace_pkgs_test.py b/python/pip_install/extract_wheels/lib/namespace_pkgs_test.py index 6f97f06613..9342ce9bd5 100644 --- a/python/pip_install/extract_wheels/lib/namespace_pkgs_test.py +++ b/python/pip_install/extract_wheels/lib/namespace_pkgs_test.py @@ -89,6 +89,32 @@ def test_parent_child_relationship_of_namespace_pkgs(self): actual = namespace_pkgs.implicit_namespace_packages(directory.root()) self.assertEqual(actual, expected) + def test_parent_child_relationship_of_namespace_and_standard_pkgs(self): + directory = TempDir() + directory.add_file("foo/bar/biff/__init__.py") + directory.add_file("foo/bar/biff/another_module.py") + + expected = { + directory.root() + "/foo", + directory.root() + "/foo/bar", + } + actual = namespace_pkgs.implicit_namespace_packages(directory.root()) + self.assertEqual(actual, expected) + + def test_parent_child_relationship_of_namespace_and_nested_standard_pkgs(self): + directory = TempDir() + directory.add_file("foo/bar/__init__.py") + directory.add_file("foo/bar/biff/another_module.py") + directory.add_file("foo/bar/biff/__init__.py") + directory.add_file("foo/bar/boof/big_module.py") + directory.add_file("foo/bar/boof/__init__.py") + + expected = { + directory.root() + "/foo", + } + actual = namespace_pkgs.implicit_namespace_packages(directory.root()) + self.assertEqual(actual, expected) + def test_recognized_all_nonstandard_module_types(self): directory = TempDir() directory.add_file("ayy/my_module.pyc") From c04fdc1d104fca4ae1fa74680d8d71e22946bf82 Mon Sep 17 00:00:00 2001 From: Jonathon Belotti Date: Tue, 8 Jun 2021 18:01:30 +1000 Subject: [PATCH 4/6] handle case where ns-pkg is parent of regular pkg(s) --- python/pip_install/extract_wheels/lib/namespace_pkgs.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/python/pip_install/extract_wheels/lib/namespace_pkgs.py b/python/pip_install/extract_wheels/lib/namespace_pkgs.py index dc2c8a2b14..70c4fc731c 100644 --- a/python/pip_install/extract_wheels/lib/namespace_pkgs.py +++ b/python/pip_install/extract_wheels/lib/namespace_pkgs.py @@ -21,10 +21,11 @@ def implicit_namespace_packages( The set of directories found under root to be packages using the native namespace method. """ namespace_pkg_dirs: Set[str] = set() + standard_pkg_dirs: Set[str] = set() # Traverse bottom-up because a directory can be a namespace pkg because its child contains module files. for dirpath, dirnames, filenames in os.walk(directory, topdown=False): - # We are only interested in dirs with no __init__.py file if "__init__.py" in filenames: + standard_pkg_dirs.add(dirpath) continue elif ignored_dirnames: is_ignored_dir = dirpath in ignored_dirnames @@ -34,8 +35,10 @@ def implicit_namespace_packages( non_empty_package = _includes_python_modules(filenames) parent_of_namespace_pkg = any(str(pathlib.Path(dirpath, d)) in namespace_pkg_dirs for d in dirnames) + parent_of_standard_pkg = any(str(pathlib.Path(dirpath, d)) in standard_pkg_dirs for d in dirnames) + parent_of_pkg = parent_of_namespace_pkg or parent_of_standard_pkg if ( - (non_empty_package or parent_of_namespace_pkg) + (non_empty_package or parent_of_pkg) and # The root of the directory should never be an implicit namespace dirpath != directory From 9a332624408c57ddbbb816e9ad41785c99108f75 Mon Sep 17 00:00:00 2001 From: Jonathon Belotti Date: Tue, 8 Jun 2021 18:02:50 +1000 Subject: [PATCH 5/6] minor addition to testcase --- python/pip_install/extract_wheels/lib/namespace_pkgs_test.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/python/pip_install/extract_wheels/lib/namespace_pkgs_test.py b/python/pip_install/extract_wheels/lib/namespace_pkgs_test.py index 9342ce9bd5..baec8b3fff 100644 --- a/python/pip_install/extract_wheels/lib/namespace_pkgs_test.py +++ b/python/pip_install/extract_wheels/lib/namespace_pkgs_test.py @@ -108,9 +108,11 @@ def test_parent_child_relationship_of_namespace_and_nested_standard_pkgs(self): directory.add_file("foo/bar/biff/__init__.py") directory.add_file("foo/bar/boof/big_module.py") directory.add_file("foo/bar/boof/__init__.py") + directory.add_file("fim/in_a_ns_pkg.py") expected = { directory.root() + "/foo", + directory.root() + "/fim", } actual = namespace_pkgs.implicit_namespace_packages(directory.root()) self.assertEqual(actual, expected) From fffaa0661fe359703f0f652ed498dbeceea0c6ae Mon Sep 17 00:00:00 2001 From: Jonathon Belotti Date: Tue, 8 Jun 2021 18:07:18 +1000 Subject: [PATCH 6/6] Rename var --- python/pip_install/extract_wheels/lib/namespace_pkgs.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/pip_install/extract_wheels/lib/namespace_pkgs.py b/python/pip_install/extract_wheels/lib/namespace_pkgs.py index 70c4fc731c..30729fe132 100644 --- a/python/pip_install/extract_wheels/lib/namespace_pkgs.py +++ b/python/pip_install/extract_wheels/lib/namespace_pkgs.py @@ -33,12 +33,12 @@ def implicit_namespace_packages( if is_ignored_dir or child_of_ignored_dir: continue - non_empty_package = _includes_python_modules(filenames) + dir_includes_py_modules = _includes_python_modules(filenames) parent_of_namespace_pkg = any(str(pathlib.Path(dirpath, d)) in namespace_pkg_dirs for d in dirnames) parent_of_standard_pkg = any(str(pathlib.Path(dirpath, d)) in standard_pkg_dirs for d in dirnames) parent_of_pkg = parent_of_namespace_pkg or parent_of_standard_pkg if ( - (non_empty_package or parent_of_pkg) + (dir_includes_py_modules or parent_of_pkg) and # The root of the directory should never be an implicit namespace dirpath != directory