Skip to content

Commit

Permalink
Merge 818a9cb into 095de01
Browse files Browse the repository at this point in the history
  • Loading branch information
amol- committed Jun 13, 2023
2 parents 095de01 + 818a9cb commit 62822a0
Show file tree
Hide file tree
Showing 13 changed files with 496 additions and 32 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ jobs:
strategy:
fail-fast: false
matrix:
os: [ubuntu-latest, macos-latest]
os: [ubuntu-latest, macos-latest, windows-latest]
python: ["3.7", "3.8", "3.9", "3.10", "3.11"]
runs-on: ${{ matrix.os }}
steps:
Expand Down
21 changes: 21 additions & 0 deletions README.rst
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,13 @@ consolidatewheels
.. image:: https://img.shields.io/pypi/l/consolidatewheels.svg
:target: https://pypi.python.org/pypi/consolidatewheels

Supported Platforms
-------------------

* Linux
* OSX
* Windows

Introduction
------------

Expand Down Expand Up @@ -80,13 +87,27 @@ After ``consolidatewheels`` is used, the final result would be::
which would work correctly as far as ``libone`` is imported _before_ ``libtwo`` as they will
both look for ``libfoo-ef63151d.so`` which was loaded already by ``libone``.

Linux Support
~~~~~~~~~~~~~

``consolidatewheels`` works also in conjunction with ``auditwheel``, consolidating all libraries
embedded by ``auditwheel``. But on Linux duplicates won't be removed automatically, so
you need to make sure to use ``auditwheel --exclude`` to ensure libraries are not embedded twice.

OSX Support
~~~~~~~~~~~

``consolidatewheels`` works also in conjunction with ``delocate``, consolidating all libraries
embedded by ``delocate`` and removing duplicates of the embedded libraries when they are provided
in multiple wheels.

Windows Support
~~~~~~~~~~~~~~~

``consolidatewheels`` works also in conjunction with ``delvewheel``, consolidating all libraries
embedded by ``delvewheel`` and removing duplicates of the embedded libraries when they are provided
in multiple wheels.

Install
-------

Expand Down
131 changes: 131 additions & 0 deletions consolidatewheels/consolidate_win.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
from __future__ import annotations

import os
import pathlib
import tempfile

import pefile

from .wheelsfunc import packwheels, unpackwheels


def consolidate(wheels: list[str], destdir: str) -> None:
"""Consolidate shared objects references within multiple wheels.
Given a list of wheels, makes sure that they all share the
same marshaling of libraries names when those libraries aren't
already included in the wheel itself.
The resulting new wheels are written into ``destdir``.
"""
wheels = [os.path.abspath(w) for w in wheels]
with tempfile.TemporaryDirectory() as tmpcd:
print(f"Consolidate, Working inside {tmpcd}")
wheeldirs = unpackwheels(wheels, workdir=tmpcd)
mangling_map = buildlibmap(wheeldirs)
print(f"Applying consistent mangling: {mangling_map}")
patch_wheeldirs(wheeldirs, mangling_map)
packwheels(wheeldirs, destdir)


def patch_wheeldirs(wheeldirs: list[str], mangling_map: dict[str, str]):
"""Provided a mapping of mangled library names, apply the manglign to all wheels.
This traverses the content of all provided wheel directories
looking for .so files. For every file, will patch the file dependencies
so that they look for the mangled version of the library instead of
the unmangled one.
Not that this takes for granted that all libraries were mangled by
delvewheel and deduped by the dedupe step.
"""
for wheeldir in wheeldirs:
for lib_to_patch_path in pathlib.Path(wheeldir).rglob("*.dll"):
lib_to_patch = str(lib_to_patch_path)
print(f"Patching {lib_to_patch}")
imports = _get_dll_imports(lib_to_patch)
for lib_to_replace in imports:
demangled_libname = demangle_libname(lib_to_replace)
updated_libname = mangling_map.get(demangled_libname)
if updated_libname is None:
# Library wasn't embedded into the wheel
continue
print(f" {lib_to_replace} -> {updated_libname}")
if not _patch_dll(
lib_to_replace,
updated_libname,
lib_to_patch,
):
raise RuntimeError(
f"Unable to apply mangling to {lib_to_patch}, "
f"{lib_to_replace}->{updated_libname}"
)


def _get_dll_imports(lib_to_patch: str) -> list[str]:
"""Provide all DLLs used by a library"""
imports = []
with pefile.PE(lib_to_patch) as dlllib:
for entry in dlllib.DIRECTORY_ENTRY_IMPORT:
imports.append(entry.dll.decode("utf-8"))
return imports


def _patch_dll(lib_to_replace: str, lib_replacement: str, lib_to_patch: str) -> bool:
"""Patch lib_to_patch replacing the name of a dependency."""
dlllib = pefile.PE(lib_to_patch)
for entry in dlllib.DIRECTORY_ENTRY_IMPORT:
print(
"PROVA0",
entry.dll,
lib_to_replace,
entry.dll.decode("utf-8") == lib_to_replace,
)
if entry.dll.decode("utf-8") == lib_to_replace:
print("PROVA", entry.struct.Name, lib_to_replace, dlllib.set_bytes_at_rva)
if not dlllib.set_bytes_at_rva(
entry.struct.Name, lib_replacement.encode("ascii") + b"\0"
):
print("SUX")
return False
print("VALLED")
dlllib.merge_modified_section_data()
# Unclear how well PE behaves when closing it before writing it back
# but if we don't close it, we get an error that the file is already in use.
dlllib.close()
dlllib.write(lib_to_patch)
return True


def buildlibmap(wheeldirs: list[str]) -> dict[str, str]:
"""Compute how libraries embedded by delvewheel should be mangled.
Across multiple wheel directories, find all the libraries that
have been embedded by delvewheel and build a map of what mangling
should be applied to each DLL.
Report an error if the same directory has multiple possible mangling,
this will usually signal that dedupe didn't run correctly when
using consolidatewheels.
"""
seen_shared_objects = {} # type: dict[str, str]
all_shared_objects = {} # type: dict[str, str]
for wheeldir in wheeldirs:
for libpath in pathlib.Path(wheeldir).rglob("*.libs/*.dll"):
demangled_lib = demangle_libname(libpath.name)
if demangled_lib in all_shared_objects:
seen_shared_object = seen_shared_objects[demangled_lib]
raise ValueError(
f"Library {demangled_lib} appears multiple times: "
f"{seen_shared_object}, {libpath}. "
"Did dedupe step run?"
)
all_shared_objects[demangled_lib] = libpath.name
seen_shared_objects[demangled_lib] = str(libpath)
return all_shared_objects


def demangle_libname(libfilename):
mangled_libname, extension = os.path.splitext(libfilename)
demangled_libname = mangled_libname.rsplit("-", 1)[0]
return f"{demangled_libname}{extension}"
33 changes: 26 additions & 7 deletions consolidatewheels/dedupe.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
from . import wheelsfunc


def dedupe(wheels: list[str], destdir: str) -> list[str]:
def dedupe(wheels: list[str], destdir: str, mangled: bool = False) -> list[str]:
"""Given a list of wheels remove duplicated libraries
This searches .dylibs embedded by delocate for libraries
Expand All @@ -26,7 +26,7 @@ def dedupe(wheels: list[str], destdir: str) -> list[str]:
with tempfile.TemporaryDirectory() as tmpcd:
print(f"Dedupe, Working inside {tmpcd}")
wheeldirs = wheelsfunc.unpackwheels(wheels, workdir=tmpcd)
delete_duplicate_libs(wheeldirs)
delete_duplicate_libs(wheeldirs, mangled)
wheels = wheelsfunc.packwheels(wheeldirs, destdir)
return wheels

Expand Down Expand Up @@ -96,14 +96,16 @@ def sort_dependencies(deptree: dict[str, list[str]]) -> list[str]:
return result


def delete_duplicate_libs(wheeldirs: list[str]) -> None:
def delete_duplicate_libs(wheeldirs: list[str], mangled: bool) -> None:
"""Given directories of unpacked wheels, preserve one copy of embedded libs.
Deletes embedded libraries if they are provided by multiple wheels,
only the first encountered lib is preserved.
This only works if embedded libraries are not marshalled or
they are marshalled with consistent naming.
mangled=True tries to make this work for mangled lib names.
This takes for granted that libraries have been mangled with
libname-HASH.ext, which is what auditwheel and dwelvewheel
currently do.
Right now delocate on OSX doesn't apply any marshaling to file names,
and thus this works correctly. Auditwheel currently seems to work
Expand All @@ -117,9 +119,26 @@ def delete_duplicate_libs(wheeldirs: list[str]) -> None:
for lib in itertools.chain(
pathlib.Path(wheeldir).rglob(".dylibs/*"),
pathlib.Path(wheeldir).rglob("*.libs/*.so"),
pathlib.Path(wheeldir).rglob("*.dll"),
):
libname = lib.name
if mangled:
libname = lib.name.split("-", 1)[0]
else:
libname = lib.name
if libname in already_seen:
print(f"Removing {libname} as already provided by another wheel.")
print(
f"Removing {lib.name} in {wheeldir} "
"as already provided by another wheel."
)
lib.unlink()

# On Windows we also have to remove the entry from
# load-order generated by delvewheel
for load_order in pathlib.Path(lib.parent).rglob(".load-order-*"):
with load_order.open() as load_order_f:
embedded_libs = load_order_f.readlines()
with load_order.open("w") as load_order_f:
for embedded_lib in embedded_libs:
if embedded_lib.strip() != lib.name:
load_order_f.write(embedded_lib)
already_seen.add(libname)
32 changes: 25 additions & 7 deletions consolidatewheels/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,26 +2,38 @@

import argparse
import os
import platform
import shutil
import subprocess
import sys
import tempfile

from . import consolidate_linux, consolidate_osx, dedupe
from . import consolidate_linux, consolidate_osx, consolidate_win, dedupe


def main() -> int:
"""Main entry point of the command line tool.
Executes consolidatewheels and returns the exit code.
"""
detected_system = platform.system().lower()

if not requirements_satisfied():
return 1

opts = parse_options()
if sys.platform == "linux":
if detected_system == "linux":
consolidate_linux.consolidate(opts.wheels, opts.dest)
elif sys.platform == "darwin":
elif detected_system == "windows":
# On Windows, we need to include all libraries
# so that they get mangled and reserve the right
# size in the IMPORTS section of the DLL to account for
# the mangling hash. That way we can then replace the hash
# without risk of overflowing.
# dedupe will take care that they don't appear twice.
with tempfile.TemporaryDirectory() as dedupedir:
wheels = dedupe.dedupe(opts.wheels, dedupedir, mangled=True)
consolidate_win.consolidate(wheels, opts.dest)
elif detected_system == "darwin":
# On Mac, delocate does not mangle library names,
# but there is no --exclude option,
# so we just have to remove the extra lib.
Expand Down Expand Up @@ -70,15 +82,17 @@ def requirements_satisfied() -> bool:
Returns ``False`` is the requirements are not satisfied.
"""
if sys.platform == "darwin":
detected_system = platform.system().lower()

if detected_system == "darwin":
if not shutil.which("install_name_tool"):
print("Cannot find required utility `install_name_tool` in PATH")
return False

if not shutil.which("codesign"):
print("Cannot find required utility `codesign` in PATH")
return False
elif sys.platform == "linux":
elif detected_system == "linux":
# Ensure that patchelf exists and we can use it.
if not shutil.which("patchelf"):
print("Cannot find required utility `patchelf` in PATH")
Expand All @@ -89,8 +103,12 @@ def requirements_satisfied() -> bool:
except subprocess.CalledProcessError:
print("Could not call `patchelf` binary")
return False
elif detected_system == "windows":
# At the moment there are no system dependencies required.
pass
else:
print("Error: This tool only supports Linux and MacOSX")
print("Error: This tool only supports Linux, MacOSX and Windows")
print("Detected System:", detected_system)
return False

# All requirements are in place, that's good!
Expand Down
5 changes: 5 additions & 0 deletions consolidatewheels/wheelsfunc.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,11 @@ def packwheels(wheeldirs: list[str], destdir: str) -> list[str]:

# This is a bit of an hack to preserve order of directories
wheel = os.listdir(tmpdir)[0]
expected_dest_file = os.path.join(destdir, wheel)
if os.path.exists(expected_dest_file):
# This is required by windows as it is unable to
# overwrite the existing file.
os.unlink(expected_dest_file)
shutil.move(os.path.join(tmpdir, wheel), destdir)
resulting_wheels.append(os.path.join(destdir, wheel))
return resulting_wheels
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,8 @@ classifiers = [
]
dependencies = [
"wheel",
"pkginfo"
"pkginfo",
"pefile"
]

[project.optional-dependencies]
Expand Down
Binary file not shown.
Binary file not shown.

0 comments on commit 62822a0

Please sign in to comment.