From 3d6e8bd263bb7b4a686a74d895034cd6d07e788b Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius <240938+aignas@users.noreply.github.com> Date: Thu, 21 Dec 2023 09:45:09 +0900 Subject: [PATCH] feat(toolchain, pip.parse): introduce a new '_host' toolchain repo This is for passing it in repository_rules and relies on the canonical label representation introduced in bazel 6.0 and symlink support (needs to be present on Windows) to work. This allows the users to not need to `load` the interpreter label from a `.bzl` file but instead specify the label in the form of `@_host//:python`. Work towards #1643. --- CHANGELOG.md | 10 ++++ examples/build_file_generation/WORKSPACE | 4 +- examples/multi_python_versions/WORKSPACE | 12 ++--- examples/pip_parse/WORKSPACE | 3 +- examples/pip_parse_vendored/WORKSPACE | 3 +- examples/pip_repository_annotations/WORKSPACE | 3 +- python/private/bzlmod/pip.bzl | 6 ++- python/private/bzlmod/pythons_hub.bzl | 23 +++++++--- python/private/toolchains_repo.bzl | 46 ++++++++++++++++++- python/repositories.bzl | 8 ++++ 10 files changed, 91 insertions(+), 27 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 31e8d2561..c4cd18813 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -55,6 +55,11 @@ A brief description of the categories of changes: instead with a location to the patch that could be used to silence the warning. Copy the patch to your workspace and add it to the list if patches for the wheel file if you decide to do so. +* (bzlmod pip.parse) Use the same interpreter label that may make the lock file + almost the same for all platforms. It will, however be different in cases where + the lock file is introducing platform-specific deps (e.g. `colorama` may be + present only in the `windows` specific requirements lock file because it + is not used elsewhere. ### Added @@ -62,6 +67,11 @@ A brief description of the categories of changes: * (gazelle) `file` generation mode can now also add `__init__.py` to the srcs attribute for every target in the package. This is enabled through a separate directive `python_generation_mode_per_file_include_init`. +* (toolchains) `python_register_toolchains` now also generates a repository + that is suffixed with `_host`, that has a single label `:python` that is a + symlink to the python interpreter for the host platform. The intended use is + mainly in `repository_rule`, which are always run using `host` platform + Python. [0.XX.0]: https://github.com/bazelbuild/rules_python/releases/tag/0.XX.0 diff --git a/examples/build_file_generation/WORKSPACE b/examples/build_file_generation/WORKSPACE index e283260ea..3f1fad8a8 100644 --- a/examples/build_file_generation/WORKSPACE +++ b/examples/build_file_generation/WORKSPACE @@ -84,8 +84,6 @@ python_register_toolchains( python_version = "3.9", ) -# Load the interpreter and pip_parse rules. -load("@python39//:defs.bzl", "interpreter") load("@rules_python//python:pip.bzl", "pip_parse") # This macro wraps the `pip_repository` rule that invokes `pip`, with `incremental` set. @@ -114,7 +112,7 @@ pip_parse( # 3. Wrapper script, like in the autodetecting python toolchain. # # Here, we use the interpreter constant that resolves to the host interpreter from the default Python toolchain. - python_interpreter_target = interpreter, + python_interpreter_target = "@python39_host//:python", # Set the location of the lock file. requirements_lock = "//:requirements_lock.txt", requirements_windows = "//:requirements_windows.txt", diff --git a/examples/multi_python_versions/WORKSPACE b/examples/multi_python_versions/WORKSPACE index 35855ca1e..f3a69ce76 100644 --- a/examples/multi_python_versions/WORKSPACE +++ b/examples/multi_python_versions/WORKSPACE @@ -28,19 +28,15 @@ python_register_multi_toolchains( ) load("@python//:pip.bzl", "multi_pip_parse") -load("@python//3.10:defs.bzl", interpreter_3_10 = "interpreter") -load("@python//3.11:defs.bzl", interpreter_3_11 = "interpreter") -load("@python//3.8:defs.bzl", interpreter_3_8 = "interpreter") -load("@python//3.9:defs.bzl", interpreter_3_9 = "interpreter") multi_pip_parse( name = "pypi", default_version = default_python_version, python_interpreter_target = { - "3.10": interpreter_3_10, - "3.11": interpreter_3_11, - "3.8": interpreter_3_8, - "3.9": interpreter_3_9, + "3.10": "@python_3_10_host//:python", + "3.11": "@python_3_11_host//:python", + "3.8": "@python_3_8_host//:python", + "3.9": "@python_3_9_host//:python", }, requirements_lock = { "3.10": "//requirements:requirements_lock_3_10.txt", diff --git a/examples/pip_parse/WORKSPACE b/examples/pip_parse/WORKSPACE index 415d064ed..1a3a6b081 100644 --- a/examples/pip_parse/WORKSPACE +++ b/examples/pip_parse/WORKSPACE @@ -14,7 +14,6 @@ python_register_toolchains( python_version = "3.9", ) -load("@python39//:defs.bzl", "interpreter") load("@rules_python//python:pip.bzl", "pip_parse") pip_parse( @@ -52,7 +51,7 @@ pip_parse( # 3. Wrapper script, like in the autodetecting python toolchain. # # Here, we use the interpreter constant that resolves to the host interpreter from the default Python toolchain. - python_interpreter_target = interpreter, + python_interpreter_target = "@python39_host//:python", # (Optional) You can set quiet to False if you want to see pip output. #quiet = False, diff --git a/examples/pip_parse_vendored/WORKSPACE b/examples/pip_parse_vendored/WORKSPACE index 157f70aeb..e0b7c86b6 100644 --- a/examples/pip_parse_vendored/WORKSPACE +++ b/examples/pip_parse_vendored/WORKSPACE @@ -14,14 +14,13 @@ python_register_toolchains( python_version = "3.9", ) -load("@python39//:defs.bzl", "interpreter") load("@rules_python//python:pip.bzl", "pip_parse") # This repository isn't referenced, except by our test that asserts the requirements.bzl is updated. # It also wouldn't be needed by users of this ruleset. pip_parse( name = "pip", - python_interpreter_target = interpreter, + python_interpreter_target = "@python39_host//:python", requirements_lock = "//:requirements.txt", ) diff --git a/examples/pip_repository_annotations/WORKSPACE b/examples/pip_repository_annotations/WORKSPACE index 35350550e..854055508 100644 --- a/examples/pip_repository_annotations/WORKSPACE +++ b/examples/pip_repository_annotations/WORKSPACE @@ -14,7 +14,6 @@ python_register_toolchains( python_version = "3.9", ) -load("@python39//:defs.bzl", "interpreter") load("@rules_python//python:pip.bzl", "package_annotation", "pip_parse") # Here we can see an example of annotations being applied to an arbitrary @@ -54,7 +53,7 @@ write_file( pip_parse( name = "pip", annotations = ANNOTATIONS, - python_interpreter_target = interpreter, + python_interpreter_target = "@python39_host//:python", requirements_lock = "//:requirements.txt", ) diff --git a/python/private/bzlmod/pip.bzl b/python/private/bzlmod/pip.bzl index 6d45a26d7..f843a8fb0 100644 --- a/python/private/bzlmod/pip.bzl +++ b/python/private/bzlmod/pip.bzl @@ -87,8 +87,10 @@ def _create_whl_repos(module_ctx, pip_attr, whl_map, whl_overrides): # we programmatically find it. hub_name = pip_attr.hub_name if python_interpreter_target == None and not pip_attr.python_interpreter: - python_name = "python_" + version_label(pip_attr.python_version, sep = "_") - if python_name not in INTERPRETER_LABELS.keys(): + python_name = "python_{}_host".format( + version_label(pip_attr.python_version, sep = "_"), + ) + if python_name not in INTERPRETER_LABELS: fail(( "Unable to find interpreter for pip hub '{hub_name}' for " + "python_version={version}: Make sure a corresponding " + diff --git a/python/private/bzlmod/pythons_hub.bzl b/python/private/bzlmod/pythons_hub.bzl index 5f536f3b6..3889e1340 100644 --- a/python/private/bzlmod/pythons_hub.bzl +++ b/python/private/bzlmod/pythons_hub.bzl @@ -78,7 +78,7 @@ DEFAULT_PYTHON_VERSION = "{default_python_version}" """ _line_for_hub_template = """\ - "{name}": Label("@{name}_{platform}//:{path}"), + "{key}": Label("@{name}_{platform}//:{path}"), """ def _hub_repo_impl(rctx): @@ -103,11 +103,22 @@ def _hub_repo_impl(rctx): # Create a dict that is later used to create # a symlink to a interpreter. - interpreter_labels = "".join([_line_for_hub_template.format( - name = name, - platform = platform, - path = path, - ) for name in rctx.attr.toolchain_user_repository_names]) + interpreter_labels = "".join([ + _line_for_hub_template.format( + key = name + ("" if platform_str != "host" else "_host"), + name = name, + platform = platform_str, + path = p, + ) + for name in rctx.attr.toolchain_user_repository_names + for platform_str, p in { + # NOTE @aignas 2023-12-21: maintaining the `platform` specific key + # here may be unneeded in the long term, but I am not sure if there + # are other users that depend on it. + platform: path, + "host": "python", + }.items() + ]) rctx.file( "interpreters.bzl", diff --git a/python/private/toolchains_repo.bzl b/python/private/toolchains_repo.bzl index 4b6bd1146..c7b61780d 100644 --- a/python/private/toolchains_repo.bzl +++ b/python/private/toolchains_repo.bzl @@ -240,8 +240,50 @@ def compile_pip_requirements(name, **kwargs): toolchain_aliases = repository_rule( _toolchain_aliases_impl, - doc = """Creates a repository with a shorter name meant for the host platform, which contains - a BUILD.bazel file declaring aliases to the host platform's targets. + doc = """\ +Creates a repository with a shorter name only referencing the python version, +it contains a BUILD.bazel file declaring aliases to the host platform's targets +and is a great fit for any usage related to setting up toolchains for build +actions.""", + attrs = { + "platforms": attr.string_list( + doc = "List of platforms for which aliases shall be created", + ), + "python_version": attr.string(doc = "The Python version."), + "user_repository_name": attr.string( + mandatory = True, + doc = "The base name for all created repositories, like 'python38'.", + ), + "_rules_python_workspace": attr.label(default = Label("//:WORKSPACE")), + }, +) + +def _host_toolchain_impl(rctx): + rctx.file("BUILD.bazel", """\ +# Generated by python/private/toolchains_repo.bzl + +exports_files(["python"], visibility = ["//visibility:public"]) +""") + + (os_name, arch) = get_host_os_arch(rctx) + host_platform = get_host_platform(os_name, arch) + host_python = rctx.path( + Label( + "@@{py_repository}_{host_platform}//:python".format( + py_repository = rctx.attr.name[:-len("_host")], + host_platform = host_platform, + ), + ), + ) + rctx.symlink(host_python, "python") + +host_toolchain = repository_rule( + _host_toolchain_impl, + doc = """\ +Creates a repository with a shorter name meant to be used in the repository_ctx, +which needs to have `symlinks` for the interpreter. This is separate from the +toolchain_aliases repo because referencing the `python` interpreter target from +this repo causes an eager fetch of the toolchain for the host platform. """, attrs = { "platforms": attr.string_list( diff --git a/python/repositories.bzl b/python/repositories.bzl index e444c49a2..01a3b108a 100644 --- a/python/repositories.bzl +++ b/python/repositories.bzl @@ -27,6 +27,7 @@ load("//python/private:full_version.bzl", "full_version") load("//python/private:internal_config_repo.bzl", "internal_config_repo") load( "//python/private:toolchains_repo.bzl", + "host_toolchain", "multi_toolchain_aliases", "toolchain_aliases", "toolchains_repo", @@ -585,6 +586,13 @@ def python_register_toolchains( platform = platform, )) + host_toolchain( + name = name + "_host", + python_version = python_version, + user_repository_name = name, + platforms = loaded_platforms, + ) + toolchain_aliases( name = name, python_version = python_version,