diff --git a/CHANGELOG.md b/CHANGELOG.md index ddfed3f727..0e01615107 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -31,8 +31,12 @@ A brief description of the categories of changes: * Skip aliases for unloaded toolchains. Some Python versions that don't have full platform support, and referencing their undefined repositories can break operations like `bazel query rdeps(...)`. + * Python code generated from `proto_library` with `strip_import_prefix` can be imported now. +* (py_wheel) Produce deterministic wheel files and make `RECORD` file entries + follow the order of files written to the `.whl` archive. + ## [0.26.0] - 2023-10-06 ### Changed @@ -106,8 +110,6 @@ A brief description of the categories of changes: * (gazelle) Improve runfiles lookup hermeticity. -* (py_wheel) Produce deterministic wheel files - ## [0.25.0] - 2023-08-22 ### Changed diff --git a/examples/wheel/wheel_test.py b/examples/wheel/wheel_test.py index 23b1c8a145..ab7b59db39 100644 --- a/examples/wheel/wheel_test.py +++ b/examples/wheel/wheel_test.py @@ -44,7 +44,7 @@ def _get_path(self, filename): else: return path - def assertFileSha256Equal(self, filename, sha): + def assertFileSha256Equal(self, filename, want): hash = hashlib.sha256() with open(filename, "rb") as f: while True: @@ -52,7 +52,7 @@ def assertFileSha256Equal(self, filename, sha): if not buf: break hash.update(buf) - self.assertEqual(hash.hexdigest(), sha) + self.assertEqual(want, hash.hexdigest()) def assertAllEntriesHasReproducibleMetadata(self, zf): for zinfo in zf.infolist(): @@ -78,7 +78,7 @@ def test_py_library_wheel(self): ], ) self.assertFileSha256Equal( - filename, "6da8e06a3fdd9ae5ee9fa8f796610723c05a4b0d7fde0ec5179401e956204139" + filename, "2818e70fdebd148934f41820f8c54d5d7676d783c0d66c7c8af2ee9141e7ddc7" ) def test_py_package_wheel(self): @@ -100,7 +100,7 @@ def test_py_package_wheel(self): ], ) self.assertFileSha256Equal( - filename, "2948b0b5e0aa421e0b40f78b74018bbc2f218165f211da0a4609e431e8e52bee" + filename, "273e27adf9bf90287a42ac911dcece8aa95f2905c37d786725477b26de23627c" ) def test_customized_wheel(self): @@ -135,16 +135,16 @@ def test_customized_wheel(self): record_contents, # The entries are guaranteed to be sorted. b"""\ -example_customized-0.0.1.dist-info/METADATA,sha256=QYQcDJFQSIqan8eiXqL67bqsUfgEAwf2hoK_Lgi1S-0,559 -example_customized-0.0.1.dist-info/NOTICE,sha256=Xpdw-FXET1IRgZ_wTkx1YQfo1-alET0FVf6V1LXO4js,76 -example_customized-0.0.1.dist-info/README,sha256=WmOFwZ3Jga1bHG3JiGRsUheb4UbLffUxyTdHczS27-o,40 -example_customized-0.0.1.dist-info/RECORD,, -example_customized-0.0.1.dist-info/WHEEL,sha256=sobxWSyDDkdg_rinUth-jxhXHqoNqlmNMJY3aTZn2Us,91 -example_customized-0.0.1.dist-info/entry_points.txt,sha256=pqzpbQ8MMorrJ3Jp0ntmpZcuvfByyqzMXXi2UujuXD0,137 examples/wheel/lib/data.txt,sha256=9vJKEdfLu8bZRArKLroPZJh1XKkK3qFMXiM79MBL2Sg,12 examples/wheel/lib/module_with_data.py,sha256=8s0Khhcqz3yVsBKv2IB5u4l4TMKh7-c_V6p65WVHPms,637 examples/wheel/lib/simple_module.py,sha256=z2hwciab_XPNIBNH8B1Q5fYgnJvQTeYf0ZQJpY8yLLY,637 examples/wheel/main.py,sha256=sgg5iWN_9inYBjm6_Zw27hYdmo-l24fA-2rfphT-IlY,909 +example_customized-0.0.1.dist-info/WHEEL,sha256=sobxWSyDDkdg_rinUth-jxhXHqoNqlmNMJY3aTZn2Us,91 +example_customized-0.0.1.dist-info/METADATA,sha256=QYQcDJFQSIqan8eiXqL67bqsUfgEAwf2hoK_Lgi1S-0,559 +example_customized-0.0.1.dist-info/entry_points.txt,sha256=pqzpbQ8MMorrJ3Jp0ntmpZcuvfByyqzMXXi2UujuXD0,137 +example_customized-0.0.1.dist-info/NOTICE,sha256=Xpdw-FXET1IRgZ_wTkx1YQfo1-alET0FVf6V1LXO4js,76 +example_customized-0.0.1.dist-info/README,sha256=WmOFwZ3Jga1bHG3JiGRsUheb4UbLffUxyTdHczS27-o,40 +example_customized-0.0.1.dist-info/RECORD,, """, ) self.assertEqual( @@ -189,7 +189,7 @@ def test_customized_wheel(self): second = second.main:s""", ) self.assertFileSha256Equal( - filename, "66f0c1bfe2cedb2f4cf08d4fe955096860186c0a2f3524e0cb02387a55ac3e63" + filename, "48eed93258bba0bb366c879b77917d947267d89e7e60005d1766d844fb909118" ) def test_legacy_filename_escaping(self): @@ -227,7 +227,7 @@ def test_legacy_filename_escaping(self): """, ) self.assertFileSha256Equal( - filename, "593c6ab58627f2446d0f1ef2956fd6d42104eedce4493c72d462f7ebf8cb74fa" + filename, "ace5fab6458f8c3b4b50801b8e8214288bba786472e81547fced743a67531312" ) def test_filename_escaping(self): @@ -293,7 +293,7 @@ def test_custom_package_root_wheel(self): for line in record_contents.splitlines(): self.assertFalse(line.startswith("/")) self.assertFileSha256Equal( - filename, "1b1fa3a4e840211084ef80049d07947b845c99bedb2778496d30e0c1524686ac" + filename, "16e0345c102c6866fed34999d8de5aed7f351adbf372b27adef3bc15161db65e" ) def test_custom_package_root_multi_prefix_wheel(self): @@ -324,7 +324,7 @@ def test_custom_package_root_multi_prefix_wheel(self): for line in record_contents.splitlines(): self.assertFalse(line.startswith("/")) self.assertFileSha256Equal( - filename, "f0422d7a338de3c76bf2525927fd93c0f47f2e9c60ecc0944e3e32b642c28137" + filename, "d2031eb21c69e290db5eac76b0dc026858e9dbdb3da2dc0314e4e9f69eab2e1a" ) def test_custom_package_root_multi_prefix_reverse_order_wheel(self): @@ -355,7 +355,7 @@ def test_custom_package_root_multi_prefix_reverse_order_wheel(self): for line in record_contents.splitlines(): self.assertFalse(line.startswith("/")) self.assertFileSha256Equal( - filename, "4f9e8c917b4050f121ac81e9a2bb65723ef09a1b90b35d93792ac3a62a60efa3" + filename, "a37b90685600ccfa56cc5405d1e9a3729ed21dfb31c76fd356e491e2af989566" ) def test_python_requires_wheel(self): @@ -380,7 +380,7 @@ def test_python_requires_wheel(self): """, ) self.assertFileSha256Equal( - filename, "9bfe8197d379f88715458a75e45c1f521a8b9d3cc43fe19b407c4ab207228b7c" + filename, "529afa454113572e6cd91f069cc9cfe5c28369f29cd495fff19d0ecce389d8e4" ) def test_python_abi3_binary_wheel(self): @@ -445,7 +445,7 @@ def test_rule_creates_directory_and_is_included_in_wheel(self): ], ) self.assertFileSha256Equal( - filename, "8ad5f639cc41ac6ac67eb70f6553a7fdecabaf3a1b952c3134eaea59610c2a64" + filename, "cc9484d527075f07651ca0e7dff4a185c1314020726bcad55fe28d1bba0fec2e" ) def test_rule_expands_workspace_status_keys_in_wheel_metadata(self): diff --git a/tools/wheelmaker.py b/tools/wheelmaker.py index f2ecbaf6ec..b051564cf2 100644 --- a/tools/wheelmaker.py +++ b/tools/wheelmaker.py @@ -84,15 +84,126 @@ def normalize_pep440(version): except packaging.version.InvalidVersion: pass - sanitized = re.sub(r'[^a-z0-9]+', '.', version.lower()).strip('.') - substituted = re.sub(r'\{\w+\}', '0', version) - delimiter = '.' if '+' in substituted else '+' + sanitized = re.sub(r"[^a-z0-9]+", ".", version.lower()).strip(".") + substituted = re.sub(r"\{\w+\}", "0", version) + delimiter = "." if "+" in substituted else "+" try: - return str( - packaging.version.Version(f'{substituted}{delimiter}{sanitized}') - ) + return str(packaging.version.Version(f"{substituted}{delimiter}{sanitized}")) except packaging.version.InvalidVersion: - return str(packaging.version.Version(f'0+{sanitized}')) + return str(packaging.version.Version(f"0+{sanitized}")) + + +class _WhlFile(zipfile.ZipFile): + def __init__( + self, + filename, + *, + mode, + distinfo_dir, + strip_path_prefixes=None, + compression=zipfile.ZIP_DEFLATED, + **kwargs, + ): + self._distinfo_dir = distinfo_dir + if not self._distinfo_dir.endswith("/"): + self._distinfo_dir += "/" + self._strip_path_prefixes = strip_path_prefixes or [] + # Entries for the RECORD file as (filename, hash, size) tuples. + self._record = [] + + super().__init__(filename, mode=mode, compression=compression, **kwargs) + + def distinfo_path(self, basename): + return self._distinfo_dir + basename + + def add_file(self, package_filename, real_filename): + """Add given file to the distribution.""" + + def arcname_from(name): + # Always use unix path separators. + normalized_arcname = name.replace(os.path.sep, "/") + # Don't manipulate names filenames in the .distinfo directory. + if normalized_arcname.startswith(self._distinfo_dir): + return normalized_arcname + for prefix in self._strip_path_prefixes: + if normalized_arcname.startswith(prefix): + return normalized_arcname[len(prefix) :] + + return normalized_arcname + + if os.path.isdir(real_filename): + directory_contents = os.listdir(real_filename) + for file_ in directory_contents: + self.add_file( + "{}/{}".format(package_filename, file_), + "{}/{}".format(real_filename, file_), + ) + return + + arcname = arcname_from(package_filename) + zinfo = self._zipinfo(arcname) + + # Write file to the zip archive while computing the hash and length + hash = hashlib.sha256() + size = 0 + with open(real_filename, "rb") as fsrc: + with self.open(zinfo, "w") as fdst: + while True: + block = fsrc.read(2**20) + if not block: + break + fdst.write(block) + hash.update(block) + size += len(block) + self._add_to_record(arcname, self._serialize_digest(hash), size) + + def add_string(self, filename, contents): + """Add given 'contents' as filename to the distribution.""" + if sys.version_info[0] > 2 and isinstance(contents, str): + contents = contents.encode("utf-8", "surrogateescape") + zinfo = self._zipinfo(filename) + self.writestr(zinfo, contents) + hash = hashlib.sha256() + hash.update(contents) + self._add_to_record(filename, self._serialize_digest(hash), len(contents)) + + def _serialize_digest(self, hash): + # https://www.python.org/dev/peps/pep-0376/#record + # "base64.urlsafe_b64encode(digest) with trailing = removed" + digest = base64.urlsafe_b64encode(hash.digest()) + digest = b"sha256=" + digest.rstrip(b"=") + return digest + + def _add_to_record(self, filename, hash, size): + size = str(size).encode("ascii") + self._record.append((filename, hash, size)) + + def _zipinfo(self, filename): + """Construct deterministic ZipInfo entry for a file named filename""" + # Strip leading path separators to mirror ZipInfo.from_file behavior + separators = os.path.sep + if os.path.altsep is not None: + separators += os.path.altsep + arcname = filename.lstrip(separators) + + zinfo = zipfile.ZipInfo(filename=arcname, date_time=_ZIP_EPOCH) + zinfo.create_system = 3 # ZipInfo entry created on a unix-y system + zinfo.external_attr = 0o777 << 16 # permissions: rwxrwxrwx + zinfo.compress_type = self.compression + return zinfo + + def add_recordfile(self): + """Write RECORD file to the distribution.""" + record_path = self.distinfo_path("RECORD") + entries = self._record + [(record_path, b"", b"")] + contents = b"" + for filename, digest, size in entries: + if sys.version_info[0] > 2 and isinstance(filename, str): + filename = filename.lstrip("/").encode("utf-8", "surrogateescape") + contents += b"%s,%s,%s\n" % (filename, digest, size) + + self.add_string(record_path, contents) + return contents class WheelMaker(object): @@ -116,9 +227,7 @@ def __init__( self._abi = abi self._platform = platform self._outfile = outfile - self._strip_path_prefixes = ( - strip_path_prefixes if strip_path_prefixes is not None else [] - ) + self._strip_path_prefixes = strip_path_prefixes if incompatible_normalize_version: self._version = normalize_pep440(self._version) @@ -144,19 +253,20 @@ def __init__( ) self._wheelname_fragment_distribution_name = self._name - self._zipfile = None - # Entries for the RECORD file as (filename, hash, size) tuples. - self._record = [] + self._whlfile = None def __enter__(self): - self._zipfile = zipfile.ZipFile( - self.filename(), mode="w", compression=zipfile.ZIP_DEFLATED + self._whlfile = _WhlFile( + self.filename(), + mode="w", + distinfo_dir=self._distinfo_dir, + strip_path_prefixes=self._strip_path_prefixes, ) return self def __exit__(self, type, value, traceback): - self._zipfile.close() - self._zipfile = None + self._whlfile.close() + self._whlfile = None def wheelname(self) -> str: components = [ @@ -177,79 +287,11 @@ def disttags(self): return ["-".join([self._python_tag, self._abi, self._platform])] def distinfo_path(self, basename): - return self._distinfo_dir + basename - - def _serialize_digest(self, hash): - # https://www.python.org/dev/peps/pep-0376/#record - # "base64.urlsafe_b64encode(digest) with trailing = removed" - digest = base64.urlsafe_b64encode(hash.digest()) - digest = b"sha256=" + digest.rstrip(b"=") - return digest - - def add_string(self, filename, contents): - """Add given 'contents' as filename to the distribution.""" - if sys.version_info[0] > 2 and isinstance(contents, str): - contents = contents.encode("utf-8", "surrogateescape") - zinfo = self._zipinfo(filename) - self._zipfile.writestr(zinfo, contents) - hash = hashlib.sha256() - hash.update(contents) - self._add_to_record(filename, self._serialize_digest(hash), len(contents)) + return self._whlfile.distinfo_path(basename) def add_file(self, package_filename, real_filename): """Add given file to the distribution.""" - - def arcname_from(name): - # Always use unix path separators. - normalized_arcname = name.replace(os.path.sep, "/") - # Don't manipulate names filenames in the .distinfo directory. - if normalized_arcname.startswith(self._distinfo_dir): - return normalized_arcname - for prefix in self._strip_path_prefixes: - if normalized_arcname.startswith(prefix): - return normalized_arcname[len(prefix) :] - - return normalized_arcname - - if os.path.isdir(real_filename): - directory_contents = os.listdir(real_filename) - for file_ in directory_contents: - self.add_file( - "{}/{}".format(package_filename, file_), - "{}/{}".format(real_filename, file_), - ) - return - - arcname = arcname_from(package_filename) - zinfo = self._zipinfo(arcname) - - # Write file to the zip archive while computing the hash and length - hash = hashlib.sha256() - size = 0 - with open(real_filename, "rb") as fsrc: - with self._zipfile.open(zinfo, "w") as fdst: - while True: - block = fsrc.read(2**20) - if not block: - break - fdst.write(block) - hash.update(block) - size += len(block) - self._add_to_record(arcname, self._serialize_digest(hash), size) - - def _zipinfo(self, filename): - """Construct deterministic ZipInfo entry for a file named filename""" - # Strip leading path separators to mirror ZipInfo.from_file behavior - separators = os.path.sep - if os.path.altsep is not None: - separators += os.path.altsep - arcname = filename.lstrip(separators) - - zinfo = zipfile.ZipInfo(filename=arcname, date_time=_ZIP_EPOCH) - zinfo.create_system = 3 # ZipInfo entry created on a unix-y system - zinfo.external_attr = 0o777 << 16 # permissions: rwxrwxrwx - zinfo.compress_type = self._zipfile.compression - return zinfo + self._whlfile.add_file(package_filename, real_filename) def add_wheelfile(self): """Write WHEEL file to the distribution""" @@ -263,7 +305,7 @@ def add_wheelfile(self): ) for tag in self.disttags(): wheel_contents += "Tag: %s\n" % tag - self.add_string(self.distinfo_path("WHEEL"), wheel_contents) + self._whlfile.add_string(self.distinfo_path("WHEEL"), wheel_contents) def add_metadata(self, metadata, name, description, version): """Write METADATA file to the distribution.""" @@ -275,23 +317,11 @@ def add_metadata(self, metadata, name, description, version): # provided. metadata += description if description else "UNKNOWN" metadata += "\n" - self.add_string(self.distinfo_path("METADATA"), metadata) + self._whlfile.add_string(self.distinfo_path("METADATA"), metadata) def add_recordfile(self): """Write RECORD file to the distribution.""" - record_path = self.distinfo_path("RECORD") - entries = self._record + [(record_path, b"", b"")] - entries.sort() - contents = b"" - for filename, digest, size in entries: - if sys.version_info[0] > 2 and isinstance(filename, str): - filename = filename.lstrip("/").encode("utf-8", "surrogateescape") - contents += b"%s,%s,%s\n" % (filename, digest, size) - self.add_string(record_path, contents) - - def _add_to_record(self, filename, hash, size): - size = str(size).encode("ascii") - self._record.append((filename, hash, size)) + self._whlfile.add_recordfile() def get_files_to_package(input_files):