From 208ca88f15994b20bcd50e075224af773dfeed8e Mon Sep 17 00:00:00 2001 From: Ignas Anikevicius <240938+aignas@users.noreply.github.com> Date: Fri, 17 Apr 2026 08:17:56 +0900 Subject: [PATCH] fix(pypi): skip index lookups when all package overrides are specified (#3710) When index_url_overrides is provided for all packages, we no longer need to call the index at all. This improves performance and aligns with the expected behavior where overrides should be sufficient. Just to note, currently Pytorch, PyPI, Artifactory have the root index pages, whilst GAR does not. Fixes #3709 --- CHANGELOG.md | 3 +- python/private/pypi/simpleapi_download.bzl | 23 +++++----- .../simpleapi_download_tests.bzl | 45 +++++++++++++++++++ 3 files changed, 59 insertions(+), 12 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index a85317a20e..84b3efea72 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -103,7 +103,8 @@ Other changes: we will from now on fetch the lists of available packages on each index. The used package mappings will be written as facts to the `MODULE.bazel.lock` file on supported bazel versions and it should be done at most once. As a result, - per-package {obj}`experimental_index_url_overrides` is no longer needed . What + per-package {obj}`experimental_index_url_overrides` is no longer needed, but + if specified, it needs to be provided for all packages not on the default index. What is more, the flags for `--index_url` and `--extra-index-url` now behave in the same way as in `uv` or `pip`, i.e. we default to `--index-url` if the package is not found in `--extra-index-url`. Fixes diff --git a/python/private/pypi/simpleapi_download.bzl b/python/private/pypi/simpleapi_download.bzl index 97215d753d..63044bc14a 100644 --- a/python/private/pypi/simpleapi_download.bzl +++ b/python/private/pypi/simpleapi_download.bzl @@ -43,8 +43,7 @@ def simpleapi_download( attr: Contains the parameters for the download. They are grouped into a struct for better clarity. It must have attributes: * index_url: str, the index, or if `extra_index_urls` are passed, the default index. - * index_url_overrides: dict[str, str], the index overrides for - separate packages. + * index_url_overrides: dict[str, str], the index overrides for separate packages. * extra_index_urls: Will be looked at in the order they are defined and the first match wins. This is similar to what uv does, see https://docs.astral.sh/uv/concepts/indexes/#searching-across-multiple-indexes. @@ -132,16 +131,22 @@ def simpleapi_download( return contents def _get_dist_urls(ctx, *, default_index, index_urls, index_url_overrides, sources, read_simpleapi, attr, block, _fail = fail, **kwargs): + if index_url_overrides: + # Let's not call the index at all and just assume that all of the overrides have been + # specified. + return { + pkg: _normalize_url("{}/{}/".format( + index_url_overrides.get(pkg, default_index), + pkg.replace("_", "-"), # Use the official normalization for URLs + )) + for pkg in sources + } + downloads = {} results = {} # Ensure the value is not frozen index_urls = [] + (index_urls or []) - for extra in index_url_overrides.values(): - if extra not in index_urls: - index_urls.append(extra) - - index_urls = index_urls or [] if default_index not in index_urls: index_urls.append(default_index) @@ -174,10 +179,6 @@ def _get_dist_urls(ctx, *, default_index, index_urls, index_url_overrides, sourc # and in the outer function process merging of the results. continue - if index_url_overrides.get(pkg, index_url) != index_url: - # we should not use this index for the package - continue - found = result.output.get(pkg) if not found: continue diff --git a/tests/pypi/simpleapi_download/simpleapi_download_tests.bzl b/tests/pypi/simpleapi_download/simpleapi_download_tests.bzl index 7d7cbee0ca..405c497508 100644 --- a/tests/pypi/simpleapi_download/simpleapi_download_tests.bzl +++ b/tests/pypi/simpleapi_download/simpleapi_download_tests.bzl @@ -268,6 +268,51 @@ def _test_download_url_parallel(env): _tests.append(_test_download_url_parallel) +def _test_download_url_parallel_with_overrides(env): + downloads = {} + reads = [ + "", + "", + "", + ] + + def download(url, output, **kwargs): + _ = kwargs # buildifier: disable=unused-variable + downloads[url[0]] = output + return struct(wait = lambda: struct(success = True)) + + simpleapi_download( + ctx = struct( + getenv = {}.get, + download = download, + report_progress = lambda _: None, + # We will first add a download to the list, so this is a poor man's `next(foo)` + # implementation. We use 2 because we will enqueue 2 downloads in parallel. + read = lambda i: reads[len(downloads) - 2], + path = lambda i: "path/for/" + i, + ), + attr = struct( + index_url_overrides = { + "bar": "https://example.com/extra/simple/", + }, + index_url = "https://example.com/default/simple/", + extra_index_urls = [], + sources = {"bar": None, "baz": None, "foo": None}, + envsubst = [], + ), + cache = pypi_cache(), + parallel_download = True, + get_auth = lambda ctx, urls, ctx_attr: struct(), + ) + + env.expect.that_dict(downloads).contains_exactly({ + "https://example.com/default/simple/baz/": "path/for/https___example_com_default_simple_baz.html", + "https://example.com/default/simple/foo/": "path/for/https___example_com_default_simple_foo.html", + "https://example.com/extra/simple/bar/": "path/for/https___example_com_extra_simple_bar.html", + }) + +_tests.append(_test_download_url_parallel_with_overrides) + def _test_download_envsubst_url(env): downloads = {} reads = [