diff --git a/.bazelrc b/.bazelrc
index 27e89faa97..94cfb93350 100644
--- a/.bazelrc
+++ b/.bazelrc
@@ -30,5 +30,4 @@ build:rtd --stamp
# Some bzl files contain repos only available under bzlmod
build:rtd --enable_bzlmod
-# Disabled due to https://github.com/bazelbuild/bazel/issues/20942
-build --lockfile_mode=off
+build --lockfile_mode=update
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 91549a712b..11728a1ef2 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -21,6 +21,12 @@ A brief description of the categories of changes:
### Changed
+* (bzlmod): The `MODULE.bazel.lock` `whl_library` rule attributes are now
+ sorted in the attributes section. We are also removing values that are not
+ default in order to reduce the size of the lock file.
+* (deps): Bumped bazel_features to 1.9.1 to detect optional support
+ non-blocking downloads.
+
### Fixed
* (whl_library): Fix the experimental_target_platforms overriding for platform
@@ -42,12 +48,15 @@ A brief description of the categories of changes:
* (gazelle) Added a new `python_default_visibility` directive to control the
_default_ visibility of generated targets. See the [docs][python_default_visibility]
for details.
-
* (wheel) Add support for `data_files` attributes in py_wheel rule
([#1777](https://github.com/bazelbuild/rules_python/issues/1777))
-
* (py_wheel) `bzlmod` installations now provide a `twine` setup for the default
Python toolchain in `rules_python` for version 3.11.
+* (bzlmod) New `experimental_index_url`, `experimental_extra_index_urls` and
+ `experimental_index_url_overrides` to `pip.parse` for using the bazel
+ downloader. If you see any issues, report in
+ [#1357](https://github.com/bazelbuild/rules_python/issues/1357). The URLs for
+ the whl and sdist files will be written to the lock file.
[0.XX.0]: https://github.com/bazelbuild/rules_python/releases/tag/0.XX.0
[python_default_visibility]: gazelle/README.md#directive-python_default_visibility
diff --git a/MODULE.bazel b/MODULE.bazel
index a165a94bcd..fc32a3e51f 100644
--- a/MODULE.bazel
+++ b/MODULE.bazel
@@ -4,7 +4,7 @@ module(
compatibility_level = 1,
)
-bazel_dep(name = "bazel_features", version = "1.1.1")
+bazel_dep(name = "bazel_features", version = "1.9.1")
bazel_dep(name = "bazel_skylib", version = "1.3.0")
bazel_dep(name = "platforms", version = "0.0.4")
@@ -58,6 +58,7 @@ register_toolchains("@pythons_hub//:all")
pip = use_extension("//python/extensions:pip.bzl", "pip")
pip.parse(
+ experimental_index_url = "https://pypi.org/simple",
hub_name = "rules_python_publish_deps",
python_version = "3.11",
requirements_darwin = "//tools/publish:requirements_darwin.txt",
@@ -69,7 +70,7 @@ use_repo(pip, "rules_python_publish_deps")
# ===== DEV ONLY DEPS AND SETUP BELOW HERE =====
bazel_dep(name = "stardoc", version = "0.6.2", dev_dependency = True, repo_name = "io_bazel_stardoc")
bazel_dep(name = "rules_bazel_integration_test", version = "0.20.0", dev_dependency = True)
-bazel_dep(name = "rules_testing", version = "0.5.0", dev_dependency = True)
+bazel_dep(name = "rules_testing", version = "0.6.0", dev_dependency = True)
bazel_dep(name = "rules_cc", version = "0.0.9", dev_dependency = True)
# Extra gazelle plugin deps so that WORKSPACE.bzlmod can continue including it for e2e tests.
@@ -83,6 +84,8 @@ dev_pip = use_extension(
dev_dependency = True,
)
dev_pip.parse(
+ envsubst = ["PIP_INDEX_URL"],
+ experimental_index_url = "${PIP_INDEX_URL:-https://pypi.org/simple}",
experimental_requirement_cycles = {
"sphinx": [
"sphinx",
@@ -98,6 +101,8 @@ dev_pip.parse(
requirements_lock = "//docs/sphinx:requirements.txt",
)
dev_pip.parse(
+ envsubst = ["PIP_INDEX_URL"],
+ experimental_index_url = "${PIP_INDEX_URL:-https://pypi.org/simple}",
hub_name = "pypiserver",
python_version = "3.11",
requirements_lock = "//examples/wheel:requirements_server.txt",
diff --git a/examples/bzlmod/MODULE.bazel b/examples/bzlmod/MODULE.bazel
index ceb0010bd4..1134487145 100644
--- a/examples/bzlmod/MODULE.bazel
+++ b/examples/bzlmod/MODULE.bazel
@@ -94,6 +94,20 @@ use_repo(pip, "whl_mods_hub")
# Alternatively, `python_interpreter_target` can be used to directly specify
# the Python interpreter to run to resolve dependencies.
pip.parse(
+ # We can use `envsubst in the above
+ envsubst = ["PIP_INDEX_URL"],
+ # Use the bazel downloader to query the simple API for downloading the sources
+ # Note, that we can use envsubst for this value.
+ experimental_index_url = "${PIP_INDEX_URL:-https://pypi.org/simple}",
+ # One can also select a particular index for a particular package.
+ # This ensures that the setup is resistant against confusion attacks.
+ # experimental_index_url_overrides = {
+ # "my_package": "https://different-index-url.com",
+ # },
+ # Or you can specify extra indexes like with `pip`:
+ # experimental_extra_index_urls = [
+ # "https://different-index-url.com",
+ # ],
experimental_requirement_cycles = {
"sphinx": [
"sphinx",
diff --git a/internal_deps.bzl b/internal_deps.bzl
index 9931933396..2ef0dc5751 100644
--- a/internal_deps.bzl
+++ b/internal_deps.bzl
@@ -57,18 +57,9 @@ def rules_python_internal_deps():
http_archive(
name = "rules_testing",
- sha256 = "b84ed8546f1969d700ead4546de9f7637e0f058d835e47e865dcbb13c4210aed",
- strip_prefix = "rules_testing-0.5.0",
- url = "https://github.com/bazelbuild/rules_testing/releases/download/v0.5.0/rules_testing-v0.5.0.tar.gz",
- )
-
- http_archive(
- name = "rules_license",
- urls = [
- "https://mirror.bazel.build/github.com/bazelbuild/rules_license/releases/download/0.0.7/rules_license-0.0.7.tar.gz",
- "https://github.com/bazelbuild/rules_license/releases/download/0.0.7/rules_license-0.0.7.tar.gz",
- ],
- sha256 = "4531deccb913639c30e5c7512a054d5d875698daeb75d8cf90f284375fe7c360",
+ sha256 = "02c62574631876a4e3b02a1820cb51167bb9cdcdea2381b2fa9d9b8b11c407c4",
+ strip_prefix = "rules_testing-0.6.0",
+ url = "https://github.com/bazelbuild/rules_testing/releases/download/v0.6.0/rules_testing-v0.6.0.tar.gz",
)
http_archive(
@@ -221,3 +212,10 @@ def rules_python_internal_deps():
],
sha256 = "4531deccb913639c30e5c7512a054d5d875698daeb75d8cf90f284375fe7c360",
)
+
+ http_archive(
+ name = "bazel_features",
+ sha256 = "d7787da289a7fb497352211ad200ec9f698822a9e0757a4976fd9f713ff372b3",
+ strip_prefix = "bazel_features-1.9.1",
+ url = "https://github.com/bazel-contrib/bazel_features/releases/download/v1.9.1/bazel_features-v1.9.1.tar.gz",
+ )
diff --git a/internal_setup.bzl b/internal_setup.bzl
index a80099f5f5..bb62611213 100644
--- a/internal_setup.bzl
+++ b/internal_setup.bzl
@@ -14,6 +14,7 @@
"""Setup for rules_python tests and tools."""
+load("@bazel_features//:deps.bzl", "bazel_features_deps")
load("@bazel_skylib//:workspace.bzl", "bazel_skylib_workspace")
load("@cgrindel_bazel_starlib//:deps.bzl", "bazel_starlib_dependencies")
load("@com_google_protobuf//:protobuf_deps.bzl", "protobuf_deps")
@@ -42,3 +43,4 @@ def rules_python_internal_setup():
bazel_integration_test_rules_dependencies()
bazel_starlib_dependencies()
bazel_binaries(versions = SUPPORTED_BAZEL_VERSIONS)
+ bazel_features_deps()
diff --git a/python/pip_install/pip_repository.bzl b/python/pip_install/pip_repository.bzl
index 3d5f3c1eb8..55d61fcea0 100644
--- a/python/pip_install/pip_repository.bzl
+++ b/python/pip_install/pip_repository.bzl
@@ -22,6 +22,7 @@ load("//python/pip_install:requirements_parser.bzl", parse_requirements = "parse
load("//python/pip_install/private:generate_group_library_build_bazel.bzl", "generate_group_library_build_bazel")
load("//python/pip_install/private:generate_whl_library_build_bazel.bzl", "generate_whl_library_build_bazel")
load("//python/pip_install/private:srcs.bzl", "PIP_INSTALL_PY_SRCS")
+load("//python/private:auth.bzl", "AUTH_ATTRS", "get_auth")
load("//python/private:envsubst.bzl", "envsubst")
load("//python/private:normalize_name.bzl", "normalize_name")
load("//python/private:parse_whl_name.bzl", "parse_whl_name")
@@ -187,7 +188,7 @@ def use_isolated(ctx, attr):
return use_isolated
-def _parse_optional_attrs(rctx, args):
+def _parse_optional_attrs(rctx, args, extra_pip_args = None):
"""Helper function to parse common attributes of pip_repository and whl_library repository rules.
This function also serializes the structured arguments as JSON
@@ -196,6 +197,7 @@ def _parse_optional_attrs(rctx, args):
Args:
rctx: Handle to the rule repository context.
args: A list of parsed args for the rule.
+ extra_pip_args: The pip args to pass.
Returns: Augmented args list.
"""
@@ -212,7 +214,7 @@ def _parse_optional_attrs(rctx, args):
# Check for None so we use empty default types from our attrs.
# Some args want to be list, and some want to be dict.
- if rctx.attr.extra_pip_args != None:
+ if extra_pip_args != None:
args += [
"--extra_pip_args",
json.encode(struct(arg = [
@@ -759,24 +761,64 @@ def _whl_library_impl(rctx):
"--requirement",
rctx.attr.requirement,
]
-
- args = _parse_optional_attrs(rctx, args)
+ extra_pip_args = []
+ extra_pip_args.extend(rctx.attr.extra_pip_args)
# Manually construct the PYTHONPATH since we cannot use the toolchain here
environment = _create_repository_execution_environment(rctx, python_interpreter)
- repo_utils.execute_checked(
- rctx,
- op = "whl_library.ResolveRequirement({}, {})".format(rctx.attr.name, rctx.attr.requirement),
- arguments = args,
- environment = environment,
- quiet = rctx.attr.quiet,
- timeout = rctx.attr.timeout,
- )
+ whl_path = None
+ if rctx.attr.whl_file:
+ whl_path = rctx.path(rctx.attr.whl_file)
+
+ # Simulate the behaviour where the whl is present in the current directory.
+ rctx.symlink(whl_path, whl_path.basename)
+ whl_path = rctx.path(whl_path.basename)
+ elif rctx.attr.urls:
+ filename = rctx.attr.filename
+ urls = rctx.attr.urls
+ if not filename:
+ _, _, filename = urls[0].rpartition("/")
+
+ if not (filename.endswith(".whl") or filename.endswith("tar.gz") or filename.endswith(".zip")):
+ if rctx.attr.filename:
+ msg = "got '{}'".format(filename)
+ else:
+ msg = "detected '{}' from url:\n{}".format(filename, urls[0])
+ fail("Only '.whl', '.tar.gz' or '.zip' files are supported, {}".format(msg))
+
+ result = rctx.download(
+ url = urls,
+ output = filename,
+ sha256 = rctx.attr.sha256,
+ auth = get_auth(rctx, urls),
+ )
+
+ if not result.success:
+ fail("could not download the '{}' from {}:\n{}".format(filename, urls, result))
+
+ if filename.endswith(".whl"):
+ whl_path = rctx.path(rctx.attr.filename)
+ else:
+ # It is an sdist and we need to tell PyPI to use a file in this directory
+ # and not use any indexes.
+ extra_pip_args.extend(["--no-index", "--find-links", "."])
+
+ args = _parse_optional_attrs(rctx, args, extra_pip_args)
- whl_path = rctx.path(json.decode(rctx.read("whl_file.json"))["whl_file"])
- if not rctx.delete("whl_file.json"):
- fail("failed to delete the whl_file.json file")
+ if not whl_path:
+ repo_utils.execute_checked(
+ rctx,
+ op = "whl_library.ResolveRequirement({}, {})".format(rctx.attr.name, rctx.attr.requirement),
+ arguments = args,
+ environment = environment,
+ quiet = rctx.attr.quiet,
+ timeout = rctx.attr.timeout,
+ )
+
+ whl_path = rctx.path(json.decode(rctx.read("whl_file.json"))["whl_file"])
+ if not rctx.delete("whl_file.json"):
+ fail("failed to delete the whl_file.json file")
if rctx.attr.whl_patches:
patches = {}
@@ -890,7 +932,8 @@ if __name__ == "__main__":
)
return contents
-whl_library_attrs = {
+# NOTE @aignas 2024-03-21: The usage of dict({}, **common) ensures that all args to `dict` are unique
+whl_library_attrs = dict({
"annotation": attr.label(
doc = (
"Optional json encoded file containing annotation to apply to the extracted wheel. " +
@@ -898,6 +941,9 @@ whl_library_attrs = {
),
allow_files = True,
),
+ "filename": attr.string(
+ doc = "Download the whl file to this filename. Only used when the `urls` is passed. If not specified, will be auto-detected from the `urls`.",
+ ),
"group_deps": attr.string_list(
doc = "List of dependencies to skip in order to break the cycles within a dependency group.",
default = [],
@@ -911,7 +957,18 @@ whl_library_attrs = {
),
"requirement": attr.string(
mandatory = True,
- doc = "Python requirement string describing the package to make available",
+ doc = "Python requirement string describing the package to make available, if 'urls' or 'whl_file' is given, then this only needs to include foo[any_extras] as a bare minimum.",
+ ),
+ "sha256": attr.string(
+ doc = "The sha256 of the downloaded whl. Only used when the `urls` is passed.",
+ ),
+ "urls": attr.string_list(
+ doc = """\
+The list of urls of the whl to be downloaded using bazel downloader. Using this
+attr makes `extra_pip_args` and `download_only` ignored.""",
+ ),
+ "whl_file": attr.label(
+ doc = "The whl file that should be used instead of downloading or building the whl.",
),
"whl_patches": attr.label_keyed_string_dict(
doc = """a label-keyed-string dict that has
@@ -933,9 +990,8 @@ whl_library_attrs = {
for repo in all_requirements
],
),
-}
-
-whl_library_attrs.update(**common_attrs)
+}, **common_attrs)
+whl_library_attrs.update(AUTH_ATTRS)
whl_library = repository_rule(
attrs = whl_library_attrs,
diff --git a/python/private/BUILD.bazel b/python/private/BUILD.bazel
index d3d6e76a35..b105c470aa 100644
--- a/python/private/BUILD.bazel
+++ b/python/private/BUILD.bazel
@@ -119,6 +119,18 @@ bzl_library(
srcs = ["parse_whl_name.bzl"],
)
+bzl_library(
+ name = "pypi_index_bzl",
+ srcs = ["pypi_index.bzl"],
+ deps = [
+ ":auth_bzl",
+ ":normalize_name_bzl",
+ ":text_util_bzl",
+ "//python/pip_install:requirements_parser_bzl",
+ "//python/private/bzlmod:bazel_features_bzl",
+ ],
+)
+
bzl_library(
name = "py_cc_toolchain_bzl",
srcs = [
@@ -260,6 +272,9 @@ bzl_library(
name = "whl_target_platforms_bzl",
srcs = ["whl_target_platforms.bzl"],
visibility = ["//:__subpackages__"],
+ deps = [
+ "parse_whl_name_bzl",
+ ],
)
bzl_library(
diff --git a/python/private/auth.bzl b/python/private/auth.bzl
index 39ada37cae..6b612678c8 100644
--- a/python/private/auth.bzl
+++ b/python/private/auth.bzl
@@ -17,26 +17,90 @@
The implementation below is copied directly from Bazel's implementation of `http_archive`.
Accordingly, the return value of this function should be used identically as the `auth` parameter of `http_archive`.
Reference: https://github.com/bazelbuild/bazel/blob/6.3.2/tools/build_defs/repo/http.bzl#L109
+
+The helpers were further modified to support module_ctx.
"""
-# TODO @aignas 2023-12-18: use the following instead when available.
-# load("@bazel_tools//tools/build_defs/repo:utils.bzl", "get_auth")
load("@bazel_tools//tools/build_defs/repo:utils.bzl", "read_netrc", "read_user_netrc", "use_netrc")
-def get_auth(rctx, urls):
+# Copied from https://sourcegraph.com/github.com/bazelbuild/bazel@26c6add3f9809611ad3795bce1e5c0fb37902902/-/blob/tools/build_defs/repo/http.bzl
+_AUTH_PATTERN_DOC = """An optional dict mapping host names to custom authorization patterns.
+
+If a URL's host name is present in this dict the value will be used as a pattern when
+generating the authorization header for the http request. This enables the use of custom
+authorization schemes used in a lot of common cloud storage providers.
+
+The pattern currently supports 2 tokens: <login> and
+<password>, which are replaced with their equivalent value
+in the netrc file for the same host name. After formatting, the result is set
+as the value for the Authorization field of the HTTP request.
+
+Example attribute and netrc for a http download to an oauth2 enabled API using a bearer token:
+
+
+auth_patterns = {
+ "storage.cloudprovider.com": "Bearer <password>"
+}
+
+
+netrc:
+
++machine storage.cloudprovider.com + password RANDOM-TOKEN ++ +The final HTTP request would have the following header: + +
+Authorization: Bearer RANDOM-TOKEN ++""" + +# AUTH_ATTRS are used within whl_library and pip bzlmod extension. +AUTH_ATTRS = { + "auth_patterns": attr.string_dict( + doc = _AUTH_PATTERN_DOC, + ), + "netrc": attr.string( + doc = "Location of the .netrc file to use for authentication", + ), +} + +def get_auth(ctx, urls, ctx_attr = None): """Utility for retrieving netrc-based authentication parameters for repository download rules used in python_repository. Args: - rctx (repository_ctx): The repository rule's context object. + ctx(repository_ctx or module_ctx): The extension module_ctx or + repository rule's repository_ctx object. urls: A list of URLs from which assets will be downloaded. + ctx_attr(struct): The attributes to get the netrc from. When ctx is + repository_ctx, then we will attempt to use repository_ctx.attr + if this is not specified, otherwise we will use the specified + field. The module_ctx attributes are located in the tag classes + so it cannot be retrieved from the context. Returns: dict: A map of authentication parameters by URL. """ - if rctx.attr.netrc: - netrc = read_netrc(rctx, rctx.attr.netrc) - elif "NETRC" in rctx.os.environ: - netrc = read_netrc(rctx, rctx.os.environ["NETRC"]) + + # module_ctx does not have attributes, as they are stored in tag classes. Whilst + # the correct behaviour should be to pass the `attr` to the + ctx_attr = ctx_attr or getattr(ctx, "attr", None) + ctx_attr = struct( + netrc = getattr(ctx_attr, "netrc", None), + auth_patterns = getattr(ctx_attr, "auth_patterns", ""), + ) + + if ctx_attr.netrc: + netrc = read_netrc(ctx, ctx_attr.netrc) + elif "NETRC" in ctx.os.environ: + # This can be used on newer bazel versions + if hasattr(ctx, "getenv"): + netrc = read_netrc(ctx, ctx.getenv("NETRC")) + else: + netrc = read_netrc(ctx, ctx.os.environ["NETRC"]) else: - netrc = read_user_netrc(rctx) - return use_netrc(netrc, urls, rctx.attr.auth_patterns) + netrc = read_user_netrc(ctx) + + return use_netrc(netrc, urls, ctx_attr.auth_patterns) diff --git a/python/private/bzlmod/BUILD.bazel b/python/private/bzlmod/BUILD.bazel index b636cca1a2..0ec95e4bed 100644 --- a/python/private/bzlmod/BUILD.bazel +++ b/python/private/bzlmod/BUILD.bazel @@ -32,6 +32,7 @@ bzl_library( ":pip_repository_bzl", "//python/pip_install:pip_repository_bzl", "//python/pip_install:requirements_parser_bzl", + "//python/private:pypi_index_bzl", "//python/private:full_version_bzl", "//python/private:normalize_name_bzl", "//python/private:parse_whl_name_bzl", @@ -44,7 +45,7 @@ bzl_library( bzl_library( name = "bazel_features_bzl", - srcs = ["@bazel_features//:bzl_files"] if BZLMOD_ENABLED else [], + srcs = ["@bazel_features//:bzl_files"], ) bzl_library( diff --git a/python/private/bzlmod/pip.bzl b/python/private/bzlmod/pip.bzl index 13d1fa3842..e87679d794 100644 --- a/python/private/bzlmod/pip.bzl +++ b/python/private/bzlmod/pip.bzl @@ -25,10 +25,13 @@ load( "whl_library", ) load("//python/pip_install:requirements_parser.bzl", parse_requirements = "parse") +load("//python/private:auth.bzl", "AUTH_ATTRS") load("//python/private:normalize_name.bzl", "normalize_name") load("//python/private:parse_whl_name.bzl", "parse_whl_name") +load("//python/private:pypi_index.bzl", "get_simpleapi_sources", "simpleapi_download") load("//python/private:render_pkg_aliases.bzl", "whl_alias") load("//python/private:version_label.bzl", "version_label") +load("//python/private:whl_target_platforms.bzl", "select_whl") load(":pip_repository.bzl", "pip_repository") def _parse_version(version): @@ -98,7 +101,7 @@ You cannot use both the additive_build_content and additive_build_content_file a whl_mods = whl_mods, ) -def _create_whl_repos(module_ctx, pip_attr, whl_map, whl_overrides): +def _create_whl_repos(module_ctx, pip_attr, whl_map, whl_overrides, simpleapi_cache): python_interpreter_target = pip_attr.python_interpreter_target # if we do not have the python_interpreter set in the attributes @@ -123,11 +126,12 @@ def _create_whl_repos(module_ctx, pip_attr, whl_map, whl_overrides): hub_name, version_label(pip_attr.python_version), ) - requrements_lock = locked_requirements_label(module_ctx, pip_attr) + + requirements_lock = locked_requirements_label(module_ctx, pip_attr) # Parse the requirements file directly in starlark to get the information # needed for the whl_libary declarations below. - requirements_lock_content = module_ctx.read(requrements_lock) + requirements_lock_content = module_ctx.read(requirements_lock) parse_result = parse_requirements(requirements_lock_content) # Replicate a surprising behavior that WORKSPACE builds allowed: @@ -174,6 +178,28 @@ def _create_whl_repos(module_ctx, pip_attr, whl_map, whl_overrides): whl_group_mapping = {} requirement_cycles = {} + index_urls = {} + if pip_attr.experimental_index_url: + if pip_attr.download_only: + fail("Currently unsupported to use `download_only` and `experimental_index_url`") + + index_urls = simpleapi_download( + module_ctx, + attr = struct( + index_url = pip_attr.experimental_index_url, + extra_index_urls = pip_attr.experimental_extra_index_urls or [], + index_url_overrides = pip_attr.experimental_index_url_overrides or {}, + sources = [requirements_lock_content], + envsubst = pip_attr.envsubst, + # Auth related info + netrc = pip_attr.netrc, + auth_patterns = pip_attr.auth_patterns, + ), + cache = simpleapi_cache, + ) + + major_minor = _major_minor_version(pip_attr.python_version) + # Create a new wheel library for each of the different whls for whl_name, requirement_line in requirements: # We are not using the "sanitized name" because the user @@ -185,34 +211,98 @@ def _create_whl_repos(module_ctx, pip_attr, whl_map, whl_overrides): group_name = whl_group_mapping.get(whl_name) group_deps = requirement_cycles.get(group_name, []) + # Construct args separately so that the lock file can be smaller and does not include unused + # attrs. repo_name = "{}_{}".format(pip_name, whl_name) - whl_library( - name = repo_name, - requirement = requirement_line, + whl_library_args = dict( repo = pip_name, repo_prefix = pip_name + "_", + requirement = requirement_line, + ) + maybe_args = dict( + # The following values are safe to omit if they have false like values annotation = annotation, - whl_patches = { - p: json.encode(args) - for p, args in whl_overrides.get(whl_name, {}).items() - }, - experimental_target_platforms = pip_attr.experimental_target_platforms, - python_interpreter = pip_attr.python_interpreter, - python_interpreter_target = python_interpreter_target, - quiet = pip_attr.quiet, - timeout = pip_attr.timeout, - isolated = use_isolated(module_ctx, pip_attr), - extra_pip_args = extra_pip_args, download_only = pip_attr.download_only, - pip_data_exclude = pip_attr.pip_data_exclude, enable_implicit_namespace_pkgs = pip_attr.enable_implicit_namespace_pkgs, environment = pip_attr.environment, envsubst = pip_attr.envsubst, - group_name = group_name, + experimental_target_platforms = pip_attr.experimental_target_platforms, + extra_pip_args = extra_pip_args, group_deps = group_deps, + group_name = group_name, + pip_data_exclude = pip_attr.pip_data_exclude, + python_interpreter = pip_attr.python_interpreter, + python_interpreter_target = python_interpreter_target, + whl_patches = { + p: json.encode(args) + for p, args in whl_overrides.get(whl_name, {}).items() + }, + ) + whl_library_args.update({k: v for k, v in maybe_args.items() if v}) + maybe_args_with_default = dict( + # The following values have defaults next to them + isolated = (use_isolated(module_ctx, pip_attr), True), + quiet = (pip_attr.quiet, True), + timeout = (pip_attr.timeout, 600), ) + whl_library_args.update({k: v for k, (v, default) in maybe_args_with_default.items() if v == default}) + + if index_urls: + srcs = get_simpleapi_sources(requirement_line) + + whls = [] + sdist = None + for sha256 in srcs.shas: + # For now if the artifact is marked as yanked we just ignore it. + # + # See https://packaging.python.org/en/latest/specifications/simple-repository-api/#adding-yank-support-to-the-simple-api + + maybe_whl = index_urls[whl_name].whls.get(sha256) + if maybe_whl and not maybe_whl.yanked: + whls.append(maybe_whl) + continue + + maybe_sdist = index_urls[whl_name].sdists.get(sha256) + if maybe_sdist and not maybe_sdist.yanked: + sdist = maybe_sdist + continue + + print("WARNING: Could not find a whl or an sdist with sha256={}".format(sha256)) # buildifier: disable=print + + distribution = select_whl( + whls = whls, + want_abis = [ + "none", + "abi3", + "cp" + major_minor.replace(".", ""), + # Older python versions have wheels for the `*m` ABI. + "cp" + major_minor.replace(".", "") + "m", + ], + want_os = module_ctx.os.name, + want_cpu = module_ctx.os.arch, + ) or sdist + + if distribution: + whl_library_args["requirement"] = srcs.requirement + whl_library_args["urls"] = [distribution.url] + whl_library_args["sha256"] = distribution.sha256 + whl_library_args["filename"] = distribution.filename + if pip_attr.netrc: + whl_library_args["netrc"] = pip_attr.netrc + if pip_attr.auth_patterns: + whl_library_args["auth_patterns"] = pip_attr.auth_patterns + + # pip is not used to download wheels and the python `whl_library` helpers are only extracting things + whl_library_args.pop("extra_pip_args", None) + + # This is no-op because pip is not used to download the wheel. + whl_library_args.pop("download_only", None) + else: + print("WARNING: falling back to pip for installing the right file for {}".format(requirement_line)) # buildifier: disable=print - major_minor = _major_minor_version(pip_attr.python_version) + # We sort so that the lock-file remains the same no matter the order of how the + # args are manipulated in the code going before. + whl_library(name = repo_name, **dict(sorted(whl_library_args.items()))) whl_map[hub_name].setdefault(whl_name, []).append( whl_alias( repo = repo_name, @@ -329,6 +419,8 @@ def _pip_impl(module_ctx): # Where hub, whl, and pip are the repo names hub_whl_map = {} + simpleapi_cache = {} + for mod in module_ctx.modules: for pip_attr in mod.tags.parse: hub_name = pip_attr.hub_name @@ -364,7 +456,7 @@ def _pip_impl(module_ctx): else: pip_hub_map[pip_attr.hub_name].python_versions.append(pip_attr.python_version) - _create_whl_repos(module_ctx, pip_attr, hub_whl_map, whl_overrides) + _create_whl_repos(module_ctx, pip_attr, hub_whl_map, whl_overrides, simpleapi_cache) for hub_name, whl_map in hub_whl_map.items(): pip_repository( @@ -379,6 +471,49 @@ def _pip_impl(module_ctx): def _pip_parse_ext_attrs(): attrs = dict({ + "experimental_extra_index_urls": attr.string_list( + doc = """\ +The extra index URLs to use for downloading wheels using bazel downloader. +Each value is going to be subject to `envsubst` substitutions if necessary. + +The indexes must support Simple API as described here: +https://packaging.python.org/en/latest/specifications/simple-repository-api/ + +This is equivalent to `--extra-index-urls` `pip` option. +""", + default = [], + ), + "experimental_index_url": attr.string( + doc = """\ +The index URL to use for downloading wheels using bazel downloader. This value is going +to be subject to `envsubst` substitutions if necessary. + +The indexes must support Simple API as described here: +https://packaging.python.org/en/latest/specifications/simple-repository-api/ + +In the future this could be defaulted to `https://pypi.org` when this feature becomes +stable. + +This is equivalent to `--index-url` `pip` option. +""", + ), + "experimental_index_url_overrides": attr.string_dict( + doc = """\ +The index URL overrides for each package to use for downloading wheels using +bazel downloader. This value is going to be subject to `envsubst` substitutions +if necessary. + +The key is the package name (will be normalized before usage) and the value is the +index URL. + +This design pattern has been chosen in order to be fully deterministic about which +packages come from which source. We want to avoid issues similar to what happened in +https://pytorch.org/blog/compromised-nightly-dependency/. + +The indexes must support Simple API as described here: +https://packaging.python.org/en/latest/specifications/simple-repository-api/ +""", + ), "hub_name": attr.string( mandatory = True, doc = """ @@ -422,6 +557,7 @@ The labels are JSON config files describing the modifications. """, ), }, **pip_repository_attrs) + attrs.update(AUTH_ATTRS) # Like the pip_repository rule, we end up setting this manually so # don't allow users to override it. diff --git a/python/private/pypi_index.bzl b/python/private/pypi_index.bzl new file mode 100644 index 0000000000..e716831d5a --- /dev/null +++ b/python/private/pypi_index.bzl @@ -0,0 +1,358 @@ +# Copyright 2024 The Bazel Authors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +A file that houses private functions used in the `bzlmod` extension with the same name. +""" + +load("@bazel_features//:features.bzl", "bazel_features") +load("@bazel_skylib//lib:sets.bzl", "sets") +load("//python/pip_install:requirements_parser.bzl", parse_requirements = "parse") +load(":auth.bzl", "get_auth") +load(":envsubst.bzl", "envsubst") +load(":normalize_name.bzl", "normalize_name") + +def simpleapi_download(ctx, *, attr, cache): + """Download Simple API HTML. + + Args: + ctx: The module_ctx or repository_ctx. + attr: Contains the parameters for the download. They are grouped into a + struct for better clarity. It must have attributes: + * index_url: str, the index. + * index_url_overrides: dict[str, str], the index overrides for + separate packages. + * extra_index_urls: Extra index URLs that will be looked up after + the main is looked up. + * sources: list[str], the sources to download things for. Each value is + the contents of requirements files. + * envsubst: list[str], the envsubst vars for performing substitution in index url. + * netrc: The netrc parameter for ctx.download, see http_file for docs. + * auth_patterns: The auth_patterns parameter for ctx.download, see + http_file for docs. + cache: A dictionary that can be used as a cache between calls during a + single evaluation of the extension. We use a dictionary as a cache + so that we can reuse calls to the simple API when evaluating the + extension. Using the canonical_id parameter of the module_ctx would + deposit the simple API responses to the bazel cache and that is + undesirable because additions to the PyPI index would not be + reflected when re-evaluating the extension unless we do + `bazel clean --expunge`. + + Returns: + dict of pkg name to the parsed HTML contents - a list of structs. + """ + index_url_overrides = { + normalize_name(p): i + for p, i in (attr.index_url_overrides or {}).items() + } + + download_kwargs = {} + if bazel_features.external_deps.download_has_block_param: + download_kwargs["block"] = False + + # Download in parallel if possible. This will download (potentially + # duplicate) data for multiple packages if there is more than one index + # available, but that is the price of convenience. However, that price + # should be mostly negligible because the simple API calls are very cheap + # and the user should not notice any extra overhead. + # + # If we are in synchronous mode, then we will use the first result that we + # find. + # + # NOTE @aignas 2024-03-31: we are not merging results from multiple indexes + # to replicate how `pip` would handle this case. + async_downloads = {} + contents = {} + index_urls = [attr.index_url] + attr.extra_index_urls + for pkg in get_packages_from_requirements(attr.sources): + pkg_normalized = normalize_name(pkg) + + success = False + for index_url in index_urls: + result = read_simple_api( + ctx = ctx, + url = "{}/{}/".format( + index_url_overrides.get(pkg_normalized, index_url).rstrip("/"), + pkg, + ), + attr = attr, + cache = cache, + **download_kwargs + ) + if hasattr(result, "wait"): + # We will process it in a separate loop: + async_downloads.setdefault(pkg_normalized, []).append( + struct( + pkg_normalized = pkg_normalized, + wait = result.wait, + ), + ) + continue + + if result.success: + contents[pkg_normalized] = result.output + success = True + break + + if not async_downloads and not success: + fail("Failed to download metadata from urls: {}".format( + ", ".join(index_urls), + )) + + if not async_downloads: + return contents + + # If we use `block` == False, then we need to have a second loop that is + # collecting all of the results as they were being downloaded in parallel. + for pkg, downloads in async_downloads.items(): + success = False + for download in downloads: + result = download.wait() + + if result.success and download.pkg_normalized not in contents: + contents[download.pkg_normalized] = result.output + success = True + + if not success: + fail("Failed to download metadata from urls: {}".format( + ", ".join(index_urls), + )) + + return contents + +def read_simple_api(ctx, url, attr, cache, **download_kwargs): + """Read SimpleAPI. + + Args: + ctx: The module_ctx or repository_ctx. + url: str, the url parameter that can be passed to ctx.download. + attr: The attribute that contains necessary info for downloading. The + following attributes must be present: + * envsubst: The envsubst values for performing substitutions in the URL. + * netrc: The netrc parameter for ctx.download, see http_file for docs. + * auth_patterns: The auth_patterns parameter for ctx.download, see + http_file for docs. + cache: A dict for storing the results. + **download_kwargs: Any extra params to ctx.download. + Note that output and auth will be passed for you. + + Returns: + A similar object to what `download` would return except that in result.out + will be the parsed simple api contents. + """ + # NOTE @aignas 2024-03-31: some of the simple APIs use relative URLs for + # the whl location and we cannot handle multiple URLs at once by passing + # them to ctx.download if we want to correctly handle the relative URLs. + # TODO: Add a test that env subbed index urls do not leak into the lock file. + + real_url = envsubst( + url, + attr.envsubst, + ctx.getenv if hasattr(ctx, "getenv") else ctx.os.environ.get, + ) + + cache_key = real_url + if cache_key in cache: + return struct(success = True, output = cache[cache_key]) + + output_str = envsubst( + url, + attr.envsubst, + # Use env names in the subst values - this will be unique over + # the lifetime of the execution of this function and we also use + # `~` as the separator to ensure that we don't get clashes. + {e: "~{}~".format(e) for e in attr.envsubst}.get, + ) + + # Transform the URL into a valid filename + for char in [".", ":", "/", "\\", "-"]: + output_str = output_str.replace(char, "_") + + output = ctx.path(output_str.strip("_").lower() + ".html") + + # NOTE: this may have block = True or block = False in the download_kwargs + download = ctx.download( + url = [real_url], + output = output, + auth = get_auth(ctx, [real_url], ctx_attr = attr), + allow_fail = True, + **download_kwargs + ) + + if download_kwargs.get("block") == False: + # Simulate the same API as ctx.download has + return struct( + wait = lambda: _read_index_result(ctx, download.wait(), output, url, cache, cache_key), + ) + + return _read_index_result(ctx, download, output, url, cache, cache_key) + +def _read_index_result(ctx, result, output, url, cache, cache_key): + if not result.success: + return struct(success = False) + + content = ctx.read(output) + + output = parse_simple_api_html(url = url, content = content) + if output: + cache.setdefault(cache_key, output) + return struct(success = True, output = output, cache_key = cache_key) + else: + return struct(success = False) + +def get_packages_from_requirements(requirements_files): + """Get Simple API sources from a list of requirements files and merge them. + + Args: + requirements_files(list[str]): A list of requirements files contents. + + Returns: + A list. + """ + want_packages = sets.make() + for contents in requirements_files: + parse_result = parse_requirements(contents) + for distribution, _ in parse_result.requirements: + # NOTE: we'll be querying the PyPI servers multiple times if the + # requirements contains non-normalized names, but this is what user + # is specifying to us. + sets.insert(want_packages, distribution) + + return sets.to_list(want_packages) + +def get_simpleapi_sources(line): + """Get PyPI sources from a requirements.txt line. + + We interpret the spec described in + https://pip.pypa.io/en/stable/reference/requirement-specifiers/#requirement-specifiers + + Args: + line(str): The requirements.txt entry. + + Returns: + A struct with shas attribute containing a list of shas to download from pypi_index. + """ + head, _, maybe_hashes = line.partition(";") + _, _, version = head.partition("==") + version = version.partition(" ")[0].strip() + + if "@" in head: + shas = [] + else: + maybe_hashes = maybe_hashes or line + shas = [ + sha.strip() + for sha in maybe_hashes.split("--hash=sha256:")[1:] + ] + + if head == line: + head = line.partition("--hash=")[0].strip() + else: + head = head + ";" + maybe_hashes.partition("--hash=")[0].strip() + + return struct( + requirement = line if not shas else head, + version = version, + shas = sorted(shas), + ) + +def parse_simple_api_html(*, url, content): + """Get the package URLs for given shas by parsing the Simple API HTML. + + Args: + url(str): The URL that the HTML content can be downloaded from. + content(str): The Simple API HTML content. + + Returns: + A list of structs with: + * filename: The filename of the artifact. + * url: The URL to download the artifact. + * sha256: The sha256 of the artifact. + * metadata_sha256: The whl METADATA sha256 if we can download it. If this is + present, then the 'metadata_url' is also present. Defaults to "". + * metadata_url: The URL for the METADATA if we can download it. Defaults to "". + """ + sdists = {} + whls = {} + lines = content.split("= (2, 0): + # We don't expect to have version 2.0 here, but have this check in place just in case. + # https://packaging.python.org/en/latest/specifications/simple-repository-api/#versioning-pypi-s-simple-api + fail("Unsupported API version: {}".format(api_version)) + + for line in lines[1:]: + dist_url, _, tail = line.partition("#sha256=") + sha256, _, tail = tail.partition("\"") + + # See https://packaging.python.org/en/latest/specifications/simple-repository-api/#adding-yank-support-to-the-simple-api + yanked = "data-yanked" in line + + maybe_metadata, _, tail = tail.partition(">") + filename, _, tail = tail.partition("<") + + metadata_sha256 = "" + metadata_url = "" + for metadata_marker in ["data-core-metadata", "data-dist-info-metadata"]: + metadata_marker = metadata_marker + "=\"sha256=" + if metadata_marker in maybe_metadata: + # Implement https://peps.python.org/pep-0714/ + _, _, tail = maybe_metadata.partition(metadata_marker) + metadata_sha256, _, _ = tail.partition("\"") + metadata_url = dist_url + ".metadata" + break + + if filename.endswith(".whl"): + whls[sha256] = struct( + filename = filename, + url = _absolute_url(url, dist_url), + sha256 = sha256, + metadata_sha256 = metadata_sha256, + metadata_url = _absolute_url(url, metadata_url), + yanked = yanked, + ) + else: + sdists[sha256] = struct( + filename = filename, + url = _absolute_url(url, dist_url), + sha256 = sha256, + metadata_sha256 = "", + metadata_url = "", + yanked = yanked, + ) + + return struct( + sdists = sdists, + whls = whls, + ) + +def _absolute_url(index_url, candidate): + if not candidate.startswith(".."): + return candidate + + candidate_parts = candidate.split("..") + last = candidate_parts[-1] + for _ in range(len(candidate_parts) - 1): + index_url, _, _ = index_url.rstrip("/").rpartition("/") + + return "{}/{}".format(index_url, last.strip("/")) diff --git a/python/private/whl_target_platforms.bzl b/python/private/whl_target_platforms.bzl index 30e4dd4c7a..4e17f2b4c7 100644 --- a/python/private/whl_target_platforms.bzl +++ b/python/private/whl_target_platforms.bzl @@ -16,6 +16,56 @@ A starlark implementation of the wheel platform tag parsing to get the target platform. """ +load(":parse_whl_name.bzl", "parse_whl_name") + +# Taken from https://peps.python.org/pep-0600/ +_LEGACY_ALIASES = { + "manylinux1_i686": "manylinux_2_5_i686", + "manylinux1_x86_64": "manylinux_2_5_x86_64", + "manylinux2010_i686": "manylinux_2_12_i686", + "manylinux2010_x86_64": "manylinux_2_12_x86_64", + "manylinux2014_aarch64": "manylinux_2_17_aarch64", + "manylinux2014_armv7l": "manylinux_2_17_armv7l", + "manylinux2014_i686": "manylinux_2_17_i686", + "manylinux2014_ppc64": "manylinux_2_17_ppc64", + "manylinux2014_ppc64le": "manylinux_2_17_ppc64le", + "manylinux2014_s390x": "manylinux_2_17_s390x", + "manylinux2014_x86_64": "manylinux_2_17_x86_64", +} + +# _translate_cpu and _translate_os from @platforms//host:extension.bzl +def _translate_cpu(arch): + if arch in ["i386", "i486", "i586", "i686", "i786", "x86"]: + return "x86_32" + if arch in ["amd64", "x86_64", "x64"]: + return "x86_64" + if arch in ["ppc", "ppc64", "ppc64le"]: + return "ppc" + if arch in ["arm", "armv7l"]: + return "arm" + if arch in ["aarch64"]: + return "aarch64" + if arch in ["s390x", "s390"]: + return "s390x" + if arch in ["mips64el", "mips64"]: + return "mips64" + if arch in ["riscv64"]: + return "riscv64" + return None + +def _translate_os(os): + if os.startswith("mac os"): + return "osx" + if os.startswith("freebsd"): + return "freebsd" + if os.startswith("openbsd"): + return "openbsd" + if os.startswith("linux"): + return "linux" + if os.startswith("windows"): + return "windows" + return None + # The order of the dictionaries is to keep definitions with their aliases next to each # other _CPU_ALIASES = { @@ -28,8 +78,11 @@ _CPU_ALIASES = { "aarch64": "aarch64", "arm64": "aarch64", "ppc": "ppc", + "ppc64": "ppc", "ppc64le": "ppc", "s390x": "s390x", + "armv6l": "arm", + "armv7l": "arm", } # buildifier: disable=unsorted-dict-items _OS_PREFIXES = { @@ -40,6 +93,131 @@ _OS_PREFIXES = { "win": "windows", } # buildifier: disable=unsorted-dict-items +def _whl_priority(value): + """Return a value for sorting whl lists. + + TODO @aignas 2024-03-29: In the future we should create a repo for each + repo that matches the abi and then we could have config flags for the + preference of `any` wheels or `sdist` or `manylinux` vs `musllinux` or + `universal2`. Ideally we use `select` statements in the hub repo to do + the selection based on the config, but for now this is the best way to + get this working for the host platform. + + In the future the right thing would be to have `bool_flag` or something + similar to be able to have select statements that does the right thing: + * select whls vs sdists. + * select manylinux vs musllinux + * select universal2 vs arch-specific whls + + All of these can be expressed as configuration settings and included in the + select statements in the `whl` repo. This means that the user can configure + for a particular target what they need. + + Returns a 4-tuple where the items are: + * bool - is it an 'any' wheel? True if it is. + * bool - is it an 'universal' wheel? True if it is. (e.g. macos universal2 wheels) + * int - the minor plaform version (e.g. osx os version, libc version) + * int - the major plaform version (e.g. osx os version, libc version) + """ + if "." in value: + value, _, _ = value.partition(".") + + if "any" == value: + # This is just a big value that should be larger than any other value returned by this function + return (True, False, 0, 0) + + if "linux" in value: + os, _, tail = value.partition("_") + if os == "linux": + # If the platform tag starts with 'linux', then return something less than what 'any' returns + minor = 0 + major = 0 + else: + major, _, tail = tail.partition("_") # We don't need to use that because it's the same for all candidates now + minor, _, _ = tail.partition("_") + + return (False, os == "linux", int(minor), int(major)) + + if "mac" in value or "osx" in value: + _, _, tail = value.partition("_") + major, _, tail = tail.partition("_") + minor, _, _ = tail.partition("_") + + return (False, "universal2" in value, int(minor), int(major)) + + if not "win" in value: + fail("BUG: only windows, linux and mac platforms are supported, but got: {}".format(value)) + + # Windows does not have multiple wheels for the same target platform + return (False, False, 0, 0) + +def select_whl(*, whls, want_abis, want_os, want_cpu): + """Select a suitable wheel from a list. + + Args: + whls(list[struct]): A list of candidates. + want_abis(list[str]): A list of ABIs that are supported. + want_os(str): The module_ctx.os.name. + want_cpu(str): The module_ctx.os.arch. + + Returns: + None or a struct with `url`, `sha256` and `filename` attributes for the + selected whl. If no match is found, None is returned. + """ + if not whls: + return None + + candidates = {} + for whl in whls: + parsed = parse_whl_name(whl.filename) + if parsed.abi_tag not in want_abis: + # Filter out incompatible ABIs + continue + + platform_tags = list({_LEGACY_ALIASES.get(p, p): True for p in parsed.platform_tag.split(".")}) + + for tag in platform_tags: + candidates[tag] = whl + + # For most packages - if they supply 'any' wheel and there are no other + # compatible wheels with the selected abis, we can just return the value. + if len(candidates) == 1 and "any" in candidates: + return struct( + url = candidates["any"].url, + sha256 = candidates["any"].sha256, + filename = candidates["any"].filename, + ) + + target_plats = {} + has_any = "any" in candidates + for platform_tag, whl in candidates.items(): + if platform_tag == "any": + continue + + if "musl" in platform_tag: + # Ignore musl wheels for now + continue + + platform_tag = ".".join({_LEGACY_ALIASES.get(p, p): True for p in platform_tag.split(".")}) + platforms = whl_target_platforms(platform_tag) + for p in platforms: + target_plats.setdefault("{}_{}".format(p.os, p.cpu), []).append(platform_tag) + + for p, platform_tags in target_plats.items(): + if has_any: + platform_tags.append("any") + + target_plats[p] = sorted(platform_tags, key = _whl_priority) + + want = target_plats.get("{}_{}".format( + _translate_os(want_os), + _translate_cpu(want_cpu), + )) + if not want: + return want + + return candidates[want[0]] + def whl_target_platforms(platform_tag, abi_tag = ""): """Parse the wheel abi and platform tags and return (os, cpu) tuples. @@ -74,7 +252,8 @@ def whl_target_platforms(platform_tag, abi_tag = ""): for cpu in cpus ] - fail("unknown platform_tag os: {}".format(platform_tag)) + print("WARNING: ignoring unknown platform_tag os: {}".format(platform_tag)) # buildifier: disable=print + return [] def _cpu_from_tag(tag): candidate = [ @@ -87,7 +266,14 @@ def _cpu_from_tag(tag): if tag == "win32": return ["x86_32"] - elif tag.endswith("universal2") and tag.startswith("macosx"): - return ["x86_64", "aarch64"] - else: - fail("Unrecognized tag: '{}': cannot determine CPU".format(tag)) + elif tag == "win_ia64": + return [] + elif tag.startswith("macosx"): + if tag.endswith("universal2"): + return ["x86_64", "aarch64"] + elif tag.endswith("universal"): + return ["x86_64", "aarch64"] + elif tag.endswith("intel"): + return ["x86_32"] + + return [] diff --git a/tests/private/pypi_index/BUILD.bazel b/tests/private/pypi_index/BUILD.bazel new file mode 100644 index 0000000000..d365896cd3 --- /dev/null +++ b/tests/private/pypi_index/BUILD.bazel @@ -0,0 +1,3 @@ +load(":pypi_index_tests.bzl", "pypi_index_test_suite") + +pypi_index_test_suite(name = "pypi_index_tests") diff --git a/tests/private/pypi_index/pypi_index_tests.bzl b/tests/private/pypi_index/pypi_index_tests.bzl new file mode 100644 index 0000000000..e2122b5eeb --- /dev/null +++ b/tests/private/pypi_index/pypi_index_tests.bzl @@ -0,0 +1,256 @@ +# Copyright 2023 The Bazel Authors. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"" + +load("@rules_testing//lib:test_suite.bzl", "test_suite") +load("@rules_testing//lib:truth.bzl", "subjects") +load("//python/private:pypi_index.bzl", "get_simpleapi_sources", "parse_simple_api_html") # buildifier: disable=bzl-visibility + +_tests = [] + +def _test_no_simple_api_sources(env): + inputs = [ + "foo==0.0.1", + "foo==0.0.1 @ https://someurl.org", + "foo==0.0.1 @ https://someurl.org --hash=sha256:deadbeef", + "foo==0.0.1 @ https://someurl.org; python_version < 2.7 --hash=sha256:deadbeef", + ] + for input in inputs: + got = get_simpleapi_sources(input) + env.expect.that_collection(got.shas).contains_exactly([]) + env.expect.that_str(got.version).equals("0.0.1") + +_tests.append(_test_no_simple_api_sources) + +def _test_simple_api_sources(env): + tests = { + "foo==0.0.2 --hash=sha256:deafbeef --hash=sha256:deadbeef": [ + "deadbeef", + "deafbeef", + ], + "foo[extra]==0.0.2; (python_version < 2.7 or something_else == \"@\") --hash=sha256:deafbeef --hash=sha256:deadbeef": [ + "deadbeef", + "deafbeef", + ], + } + for input, want_shas in tests.items(): + got = get_simpleapi_sources(input) + env.expect.that_collection(got.shas).contains_exactly(want_shas) + env.expect.that_str(got.version).equals("0.0.2") + +_tests.append(_test_simple_api_sources) + +def _generate_html(*items): + return """\ + + + +