Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
53 changes: 42 additions & 11 deletions python/pip_install/extract_wheels/lib/namespace_pkgs.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
"""Utility functions to discover python package types"""
import os
import pathlib # supported in >= 3.4
import textwrap
from typing import Set, List, Optional

Expand All @@ -19,26 +20,30 @@ def implicit_namespace_packages(
Returns:
The set of directories found under root to be packages using the native namespace method.
"""
namespace_pkg_dirs = set()
for dirpath, dirnames, filenames in os.walk(directory, topdown=True):
# We are only interested in dirs with no __init__.py file
namespace_pkg_dirs: Set[str] = set()
standard_pkg_dirs: Set[str] = set()
# Traverse bottom-up because a directory can be a namespace pkg because its child contains module files.
for dirpath, dirnames, filenames in os.walk(directory, topdown=False):
if "__init__.py" in filenames:
dirnames[:] = [] # Remove dirnames from search
standard_pkg_dirs.add(dirpath)
continue
elif ignored_dirnames:
is_ignored_dir = dirpath in ignored_dirnames
child_of_ignored_dir = any(d in pathlib.Path(dirpath).parents for d in ignored_dirnames)
if is_ignored_dir or child_of_ignored_dir:
continue

for ignored_dir in ignored_dirnames or []:
if ignored_dir in dirnames:
dirnames.remove(ignored_dir)

non_empty_directory = dirnames or filenames
dir_includes_py_modules = _includes_python_modules(filenames)
parent_of_namespace_pkg = any(str(pathlib.Path(dirpath, d)) in namespace_pkg_dirs for d in dirnames)
parent_of_standard_pkg = any(str(pathlib.Path(dirpath, d)) in standard_pkg_dirs for d in dirnames)
parent_of_pkg = parent_of_namespace_pkg or parent_of_standard_pkg
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: if this is the only use of this variable it seems cleaner to put it in the conditional where it is used.

if (
non_empty_directory
(dir_includes_py_modules or parent_of_pkg)
and
# The root of the directory should never be an implicit namespace
dirpath != directory
):
namespace_pkg_dirs.add(dirpath)

return namespace_pkg_dirs


Expand Down Expand Up @@ -68,3 +73,29 @@ def add_pkgutil_style_namespace_pkg_init(dir_path: str) -> None:
"""
)
)


def _includes_python_modules(files: List[str]) -> bool:
"""
In order to only transform directories that Python actually considers namespace pkgs
we need to detect if a directory includes Python modules.

Which files are loadable as modules is extension based, and the particular set of extensions
varies by platform.

See:
1. https://github.com/python/cpython/blob/7d9d25dbedfffce61fc76bc7ccbfa9ae901bf56f/Lib/importlib/machinery.py#L19
2. PEP 420 -- Implicit Namespace Packages, Specification - https://www.python.org/dev/peps/pep-0420/#specification
3. dynload_shlib.c and dynload_win.c in python/cpython.
"""
module_suffixes = {
".py", # Source modules
".pyc", # Compiled bytecode modules
".so", # Unix extension modules
".pyd" # https://docs.python.org/3/faq/windows.html#is-a-pyd-file-the-same-as-a-dll
}
return any(
pathlib.Path(f).suffix in module_suffixes
for f
in files
)
81 changes: 81 additions & 0 deletions python/pip_install/extract_wheels/lib/namespace_pkgs_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,87 @@ def test_empty_case(self) -> None:
actual = namespace_pkgs.implicit_namespace_packages(directory.root())
self.assertEqual(actual, set())

def test_ignores_non_module_files_in_directories(self) -> None:
directory = TempDir()
directory.add_file("foo/__init__.pyi")
directory.add_file("foo/py.typed")

actual = namespace_pkgs.implicit_namespace_packages(directory.root())
self.assertEqual(actual, set())

def test_parent_child_relationship_of_namespace_pkgs(self):
directory = TempDir()
directory.add_file("foo/bar/biff/my_module.py")
directory.add_file("foo/bar/biff/another_module.py")

expected = {
directory.root() + "/foo",
directory.root() + "/foo/bar",
directory.root() + "/foo/bar/biff",
}
actual = namespace_pkgs.implicit_namespace_packages(directory.root())
self.assertEqual(actual, expected)

def test_parent_child_relationship_of_namespace_and_standard_pkgs(self):
directory = TempDir()
directory.add_file("foo/bar/biff/__init__.py")
directory.add_file("foo/bar/biff/another_module.py")

expected = {
directory.root() + "/foo",
directory.root() + "/foo/bar",
}
actual = namespace_pkgs.implicit_namespace_packages(directory.root())
self.assertEqual(actual, expected)

def test_parent_child_relationship_of_namespace_and_nested_standard_pkgs(self):
directory = TempDir()
directory.add_file("foo/bar/__init__.py")
directory.add_file("foo/bar/biff/another_module.py")
directory.add_file("foo/bar/biff/__init__.py")
directory.add_file("foo/bar/boof/big_module.py")
directory.add_file("foo/bar/boof/__init__.py")
directory.add_file("fim/in_a_ns_pkg.py")

expected = {
directory.root() + "/foo",
directory.root() + "/fim",
}
actual = namespace_pkgs.implicit_namespace_packages(directory.root())
self.assertEqual(actual, expected)

def test_recognized_all_nonstandard_module_types(self):
directory = TempDir()
directory.add_file("ayy/my_module.pyc")
directory.add_file("bee/ccc/dee/eee.so")
directory.add_file("eff/jee/aych.pyd")

expected = {
directory.root() + "/ayy",
directory.root() + "/bee",
directory.root() + "/bee/ccc",
directory.root() + "/bee/ccc/dee",
directory.root() + "/eff",
directory.root() + "/eff/jee",
}
actual = namespace_pkgs.implicit_namespace_packages(directory.root())
self.assertEqual(actual, expected)

def test_skips_ignored_directories(self):
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Test case is not really related to these changes, but should have been added before.

directory = TempDir()
directory.add_file("foo/boo/my_module.py")
directory.add_file("foo/bar/another_module.py")

expected = {
directory.root() + "/foo",
directory.root() + "/foo/bar",
}
actual = namespace_pkgs.implicit_namespace_packages(
directory.root(),
ignored_dirnames=[directory.root() + "/foo/boo"],
)
self.assertEqual(actual, expected)


if __name__ == "__main__":
unittest.main()