Skip to content

Commit

Permalink
feat(pip_parse): support patching 'whl_library'
Browse files Browse the repository at this point in the history
Before that the users had to rely on patching the actual wheel files and
uploading them as different versions to internal artifact stores if they
needed to modify the wheel dependencies. This is very common when
breaking dependency cycles in `pytorch` or `apache-airflow` packages.
With this feature we can support patching external PyPI dependencies via
unified patches passed into the `pip.whl_mods` extension and the legacy
`package_annotation` macro.

Fixes #1076.
  • Loading branch information
aignas committed Aug 29, 2023
1 parent 9818a60 commit f1481ba
Show file tree
Hide file tree
Showing 11 changed files with 92 additions and 13 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@ A brief description of the categories of changes:
[`py_console_script_binary`](./docs/py_console_script_binary.md), which
allows adding custom dependencies to a package's entry points and customizing
the `py_binary` rule used to build it.
* (whl_mods, package_annotation) Added patching support via `patches` and
`patch_strip` arguments.

### Removed

Expand Down
4 changes: 3 additions & 1 deletion docs/pip.md

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 3 additions & 1 deletion docs/pip_repository.md

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

9 changes: 9 additions & 0 deletions examples/bzlmod/MODULE.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,15 @@ pip.whl_mods(
additive_build_content_file = "//whl_mods:appended_build_content.BUILD",
data = [":generated_file"],
hub_name = "whl_mods_hub",
patch_strip = 1,
# You can also pass in unified patches that will be applied at the end of
# the `whl_library` so that you can use it as an alternative to other
# whl_mods features and/or modify the generated BUILD.bazel content.
#
# The 'patches' list must consist of valid labels to files in the root module.
patches = [
"//patches:empty.patch",
],
whl_name = "requests",
)

Expand Down
4 changes: 4 additions & 0 deletions examples/bzlmod/patches/BUILD.bazel
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
exports_files(
srcs = glob(["*.patch"]),
visibility = ["//visibility:public"],
)
Empty file.
9 changes: 9 additions & 0 deletions examples/pip_repository_annotations/WORKSPACE
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,15 @@ write_file(
)
""",
data = [":generated_file"],
patch_strip = 1,
patches = [
# NOTE: all patches need to be passed in as non-ambiguous label strings so that
# they can be later on convert to a `Label` and still point to the same target.
#
# You can either use the shorthand "@//patches:empty.patch" or the full string
# "@pip_repository_annotations_example//patches:empty.patch".
"@//patches:empty.patch",
],
),
"wheel": package_annotation(
additive_build_content = """\
Expand Down
4 changes: 4 additions & 0 deletions examples/pip_repository_annotations/patches/BUILD.bazel
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
exports_files(
srcs = glob(["*.patch"]),
visibility = ["//visibility:public"],
)
Empty file.
33 changes: 25 additions & 8 deletions python/extensions/pip.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -64,14 +64,24 @@ You cannot use both the additive_build_content and additive_build_content_file a
elif mods.additive_build_content_file != None:
build_content = mctx.read(mods.additive_build_content_file)

whl_mods[whl_name] = json.encode(struct(
additive_build_content = build_content,
copy_files = mods.copy_files,
copy_executables = mods.copy_executables,
data = mods.data,
data_exclude_glob = mods.data_exclude_glob,
srcs_exclude_glob = mods.srcs_exclude_glob,
))
whl_mods[whl_name] = json.encode(
struct(
additive_build_content = build_content,
copy_files = mods.copy_files,
copy_executables = mods.copy_executables,
data = mods.data,
data_exclude_glob = mods.data_exclude_glob,
srcs_exclude_glob = mods.srcs_exclude_glob,
patches = None if not mods.patches else {
# NOTE @aignas 2023-08-28: we are reading here so that we could
# use it in the third party repo as it seems that we cannot use
# a file from the root module to patch the whl_library.
p.basename: mctx.read(p)
for p in [mctx.path(p) for p in mods.patches]
},
patch_strip = mods.patch_strip,
),
)

_whl_mods_repo(
name = hub_name,
Expand Down Expand Up @@ -416,6 +426,13 @@ cannot have a child module that uses the same `hub_name`.
""",
mandatory = True,
),
"patch_strip": attr.int(
doc = "Strip the specified number of leading components from file names. Applies to all patches.",
default = 0,
),
"patches": attr.label_list(
doc = "Patches to be applied after generating BUILD.bazel files and extracting the .whl file",
),
"srcs_exclude_glob": attr.string_list(
doc = """\
(list, optional): A list of labels to add as `srcs` to the generated
Expand Down
36 changes: 33 additions & 3 deletions python/pip_install/pip_repository.bzl
Original file line number Diff line number Diff line change
Expand Up @@ -388,7 +388,17 @@ def _pip_repository_impl(rctx):
annotations = {}
for pkg, annotation in rctx.attr.annotations.items():
filename = "{}.annotation.json".format(normalize_name(pkg))
rctx.file(filename, json.encode_indent(json.decode(annotation)))
decoded_annotation = json.decode(annotation)
if decoded_annotation["patches"]:
decoded_annotation["patches"] = {
# NOTE @aignas 2023-08-28: we are reading here so that we could
# use it in the third party repo as it seems that we cannot use
# a file from the root module to patch the whl_library.
p.basename: rctx.read(p)
for p in [rctx.path(Label(p)) for p in decoded_annotation["patches"]]
}

rctx.file(filename, json.encode_indent(decoded_annotation))
annotations[pkg] = "@{name}//:{filename}".format(name = rctx.attr.name, filename = filename)

tokenized_options = []
Expand Down Expand Up @@ -661,6 +671,11 @@ def _whl_library_impl(rctx):
)
entry_points[entry_point_without_py] = entry_point_script_name

annotation = None
if rctx.attr.annotation:
json_contents = json.decode(rctx.read(rctx.attr.annotation))
annotation = struct(**json_contents)

build_file_contents = generate_whl_library_build_bazel(
repo_prefix = rctx.attr.repo_prefix,
dependencies = metadata["deps"],
Expand All @@ -670,9 +685,16 @@ def _whl_library_impl(rctx):
"pypi_version=" + metadata["version"],
],
entry_points = entry_points,
annotation = None if not rctx.attr.annotation else struct(**json.decode(rctx.read(rctx.attr.annotation))),
annotation = annotation,
)
rctx.file("BUILD.bazel", build_file_contents)
if annotation and annotation.patches:
# We are using `bzlmod` and the patches are embedded in the annotation.
for patch_name, patch in annotation.patches.items():
# First write the patch to the directory
patch_name = "xrules_python_patch_" + patch_name
rctx.file(patch_name, patch)
rctx.patch("./" + patch_name, strip = annotation.patch_strip)

return

Expand Down Expand Up @@ -739,7 +761,9 @@ def package_annotation(
copy_executables = {},
data = [],
data_exclude_glob = [],
srcs_exclude_glob = []):
srcs_exclude_glob = [],
patches = None,
patch_strip = None):
"""Annotations to apply to the BUILD file content from package generated from a `pip_repository` rule.
[cf]: https://github.com/bazelbuild/bazel-skylib/blob/main/docs/copy_file_doc.md
Expand All @@ -754,6 +778,10 @@ def package_annotation(
data_exclude_glob (list, optional): A list of exclude glob patterns to add as `data` to the generated
`py_library` target.
srcs_exclude_glob (list, optional): A list of labels to add as `srcs` to the generated `py_library` target.
patches (list, optional): A list of labels for the patches to be applied at the end of `whl_library`
extraction. Support patching BUILD.bazel files.
patch_strip (int, optional): Strip the specified number of leading components from file names.
Applies to all patches.
Returns:
str: A json encoded string of the provided content.
Expand All @@ -765,6 +793,8 @@ def package_annotation(
data = data,
data_exclude_glob = data_exclude_glob,
srcs_exclude_glob = srcs_exclude_glob,
patches = patches if not patches else [str(Label(p)) for p in patches],
patch_strip = patch_strip,
))

# pip_repository implementation
Expand Down

0 comments on commit f1481ba

Please sign in to comment.