From 9e727c4f2b8522a7d04c6e2c0cc974e9aafc6d89 Mon Sep 17 00:00:00 2001 From: Alex Eagle Date: Sun, 16 Jun 2024 13:47:22 -0700 Subject: [PATCH 1/2] feat: py_image_layers Fixes #212 --- WORKSPACE | 26 +++++++++- examples/py_binary/BUILD.bazel | 52 +++++++++++++++++++- examples/py_binary/image_test.yaml | 8 ++++ internal_deps.bzl | 15 ++++++ py/defs.bzl | 3 +- py/private/py_image_layers.bzl | 76 ++++++++++++++++++++++++++++++ py/repositories.bzl | 9 ++-- 7 files changed, 179 insertions(+), 10 deletions(-) create mode 100644 examples/py_binary/image_test.yaml create mode 100644 py/private/py_image_layers.bzl diff --git a/WORKSPACE b/WORKSPACE index d5837741..d46f68dd 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -19,16 +19,24 @@ load("//py:toolchains.bzl", "rules_py_toolchains") rules_py_toolchains() +load("@bazel_features//:deps.bzl", "bazel_features_deps") + +bazel_features_deps() + # Load the Python toolchain for rules_docker register_toolchains("//:container_py_toolchain") +load("@rules_oci//oci:repositories.bzl", "oci_register_toolchains") + +oci_register_toolchains(name = "oci") + load("@rules_python//python:repositories.bzl", "py_repositories", "python_register_toolchains") python_register_toolchains( name = "python_toolchain_3_8", python_version = "3.8.12", # Setting `set_python_version_constraint` will set special constraints on the registered toolchain. - # This means that this toolchain registration will only be selected for `py_binary` / `py_test` targets + # This means that this toolchain registration will only be selected for `py_binary` / `py_test` targets # that have the `python_version = "3.8.12"` attribute set. Targets that have no `python_attribute` will use # the default toolchain resolved which can be seen below. set_python_version_constraint = True, @@ -45,10 +53,12 @@ py_repositories() ############################################ # Aspect bazel-lib -load("@aspect_bazel_lib//lib:repositories.bzl", "register_coreutils_toolchains") +load("@aspect_bazel_lib//lib:repositories.bzl", "register_coreutils_toolchains", "register_tar_toolchains") register_coreutils_toolchains() +register_tar_toolchains() + ############################################ ## CC toolchain using llvm load("@toolchains_llvm//toolchain:deps.bzl", "bazel_toolchain_dependencies") @@ -146,6 +156,18 @@ load( _py_image_repos() +load("@rules_oci//oci:pull.bzl", "oci_pull") + +oci_pull( + name = "ubuntu", + digest = "sha256:67211c14fa74f070d27cc59d69a7fa9aeff8e28ea118ef3babc295a0428a6d21", + image = "ubuntu", + platforms = [ + "linux/arm64/v8", + "linux/amd64", + ], +) + ############################################ # rules_rust dependencies for building tools load("@rules_rust//rust:repositories.bzl", "rules_rust_dependencies", "rust_register_toolchains", "rust_repository_set") diff --git a/examples/py_binary/BUILD.bazel b/examples/py_binary/BUILD.bazel index 43e61a7e..cf7e8e6c 100644 --- a/examples/py_binary/BUILD.bazel +++ b/examples/py_binary/BUILD.bazel @@ -1,9 +1,57 @@ -load("//py:defs.bzl", "py_binary") +load("@aspect_bazel_lib//lib:transitions.bzl", "platform_transition_filegroup") +load("//py:defs.bzl", "py_binary", "py_image_layers") +load("@rules_oci//oci:defs.bzl", "oci_image", "oci_tarball") py_binary( - name = "py_binary", + name = "say_hello", srcs = ["say.py"], deps = [ "@pypi_cowsay//:pkg", ], ) + +oci_image( + name = "say_image", + base = "@ubuntu", + entrypoint = ["/examples/py_binary/say_hello"], + tars = py_image_layers("say_image_layers", "say_hello"), +) + +platform( + name = "aarch64_linux", + constraint_values = [ + "@platforms//os:linux", + "@platforms//cpu:aarch64", + ], +) + +platform( + name = "x86_64_linux", + constraint_values = [ + "@platforms//os:linux", + "@platforms//cpu:x86_64", + ], +) + +platform_transition_filegroup( + name = "platform_image", + srcs = [":say_image"], + target_platform = select({ + "@platforms//cpu:arm64": ":aarch64_linux", + "@platforms//cpu:x86_64": ":x86_64_linux", + }), +) + +container_structure_test( + name = "test", + configs = ["test.yaml"], + image = ":platform_image", +) + +# $ bazel run //examples/py_binary:load +# $ docker run --rm gcr.io/oci_python_hello_world:latest +oci_tarball( + name = "load", + image = ":platform_image", + repo_tags = ["bazel/say:latest"], +) diff --git a/examples/py_binary/image_test.yaml b/examples/py_binary/image_test.yaml new file mode 100644 index 00000000..8a403224 --- /dev/null +++ b/examples/py_binary/image_test.yaml @@ -0,0 +1,8 @@ +# See https://github.com/GoogleContainerTools/container-structure-test#command-tests +schemaVersion: 2.0.0 +metadataTest: + entrypoint: ["/examples/py_binary/say_hello"] +commandTests: + - name: run + command: /examples/py_binary/say_hello + expectedOutput: ["hello py_binary!"] diff --git a/internal_deps.bzl b/internal_deps.bzl index 526557da..f8794c55 100644 --- a/internal_deps.bzl +++ b/internal_deps.bzl @@ -39,6 +39,13 @@ def rules_py_internal_deps(): ], ) + http_archive( + name = "bazel_features", + sha256 = "5d7e4eb0bb17aee392143cd667b67d9044c270a9345776a5e5a3cccbc44aa4b3", + strip_prefix = "bazel_features-1.13.0", + url = "https://github.com/bazel-contrib/bazel_features/releases/download/v1.13.0/bazel_features-v1.13.0.tar.gz", + ) + # Override bazel_skylib distribution to fetch sources instead # so that the gazelle extension is included # see https://github.com/bazelbuild/bazel-skylib/issues/250 @@ -118,3 +125,11 @@ def rules_py_internal_deps(): sha256 = "0523026398aea9c8b5f7a4a6d5c0829c285b4fbd960c17b5967a369342e21e01", downloaded_file_path = "sqlparse-0.4.0-py3-none-any.whl", ) + + # for testing py_image_layers + http_archive( + name = "rules_oci", + sha256 = "768cd23d5fea0235858eecfc8bfaae77a11fe9db9ebb1ac03d31c4b19eb9bc11", + strip_prefix = "rules_oci-2.0.0-alpha5", + url = "https://github.com/bazel-contrib/rules_oci/releases/download/v2.0.0-alpha5/rules_oci-v2.0.0-alpha5.tar.gz", + ) diff --git a/py/defs.bzl b/py/defs.bzl index 5883a48c..e3e7e043 100644 --- a/py/defs.bzl +++ b/py/defs.bzl @@ -8,6 +8,7 @@ load("//py/private:py_pytest_main.bzl", _py_pytest_main = "py_pytest_main") load("//py/private:py_unpacked_wheel.bzl", _py_unpacked_wheel = "py_unpacked_wheel") load("//py/private:virtual.bzl", _resolutions = "resolutions") load("//py/private:py_venv.bzl", _py_venv = "py_venv") +load("//py/private:py_image_layers.bzl", _py_image_layers = "py_image_layers") py_pytest_main = _py_pytest_main @@ -16,7 +17,7 @@ py_binary_rule = _py_binary py_test_rule = _py_test py_library_rule = _py_library py_unpacked_wheel_rule = _py_unpacked_wheel - +py_image_layers = _py_image_layers resolutions = _resolutions def _py_binary_or_test(name, rule, srcs, main, imports, deps = [], resolutions = {}, **kwargs): diff --git a/py/private/py_image_layers.bzl b/py/private/py_image_layers.bzl new file mode 100644 index 00000000..398d6a03 --- /dev/null +++ b/py/private/py_image_layers.bzl @@ -0,0 +1,76 @@ +"Helper function to make three separate layers for python applications" + +load("@aspect_bazel_lib//lib:tar.bzl", "mtree_spec", "tar") + +# match *only* external repositories that have the string "python" +# e.g. this will match +# `/hello_world/hello_world_bin.runfiles/rules_python~0.21.0~python~python3_9_aarch64-unknown-linux-gnu/bin/python3` +# but not match +# `/hello_world/hello_world_bin.runfiles/_main/python_app` +PY_INTERPRETER_REGEX = "\\.runfiles/.*python.*-.*" + +# match *only* external pip like repositories that contain the string "site-packages" +SITE_PACKAGES_REGEX = "\\.runfiles/.*/site-packages/.*" + +def py_image_layers(name, binary, interpreter_regex = PY_INTERPRETER_REGEX, site_packages_regex = SITE_PACKAGES_REGEX): + """Create three layers for a py_binary target: interpreter, third-party packages, and application code. + + This allows a container image to have smaller uploads, since the application layer usually changes more + than the other two. + + > [!NOTE] + > The middle layer may duplicate other py_image_layers which have a disjoint set of dependencies. + > Follow https://github.com/aspect-build/rules_py/issues/244 + + Args: + name: prefix for generated targets, to ensure they are unique within the package + binary: a py_binary target + interpreter_regex: a regular expression for use by `grep` which extracts the interpreter and related files from the binary runfiles tree + site_packages_regex: a regular expression for use by `grep` which extracts installed packages from the binary runfiles tree + Returns: + a list of labels for the layers, which are tar files + """ + + # Produce layers in this order, as the app changes most often + layers = ["interpreter", "packages", "app"] + + # Produce the manifest for a tar file of our py_binary, but don't tar it up yet, so we can split + # into fine-grained layers for better docker performance. + mtree_spec( + name = name + ".mf", + srcs = [binary], + ) + + native.genrule( + name = name + ".interpreter_tar_manifest", + srcs = [name + ".mf"], + outs = [name + ".interpreter_tar_manifest.spec"], + cmd = "grep '{}' $< >$@".format(PY_INTERPRETER_REGEX), + ) + + native.genrule( + name = name + ".packages_tar_manifest", + srcs = [name + ".mf"], + outs = [name + ".packages_tar_manifest.spec"], + cmd = "grep '{}' $< >$@".format(SITE_PACKAGES_REGEX), + ) + + # Any lines that didn't match one of the two grep above + native.genrule( + name = name + ".app_tar_manifest", + srcs = [name + ".mf"], + outs = [name + ".app_tar_manifest.spec"], + cmd = "grep -v '{}' $< | grep -v '{}' >$@".format(SITE_PACKAGES_REGEX, PY_INTERPRETER_REGEX), + ) + + result = [] + for layer in layers: + layer_target = "{}.{}_layer".format(name, layer) + result.append(layer_target) + tar( + name = layer_target, + srcs = [binary], + mtree = "{}.{}_tar_manifest".format(name, layer), + ) + + return result diff --git a/py/repositories.bzl b/py/repositories.bzl index 17d321f4..6ea82c00 100644 --- a/py/repositories.bzl +++ b/py/repositories.bzl @@ -10,7 +10,6 @@ load("@bazel_tools//tools/build_defs/repo:utils.bzl", "maybe") def http_archive(name, **kwargs): maybe(_http_archive, name = name, **kwargs) - # WARNING: any changes in this function may be BREAKING CHANGES for users # because we'll fetch a dependency which may be different from one that # they were previously fetching later in their WORKSPACE setup, and now @@ -32,9 +31,9 @@ def rules_py_dependencies(): http_archive( name = "aspect_bazel_lib", - sha256 = "5371d3143307e5222e3c33a575042f93647b4e0a7d6d837f87b6b751102d27ca", - strip_prefix = "bazel-lib-1.40.3", - url = "https://github.com/aspect-build/bazel-lib/archive/refs/tags/v1.40.3.tar.gz", + sha256 = "6d758a8f646ecee7a3e294fbe4386daafbe0e5966723009c290d493f227c390b", + strip_prefix = "bazel-lib-2.7.7", + url = "https://github.com/aspect-build/bazel-lib/releases/download/v2.7.7/bazel-lib-v2.7.7.tar.gz", ) http_archive( @@ -42,4 +41,4 @@ def rules_py_dependencies(): sha256 = "c68bdc4fbec25de5b5493b8819cfc877c4ea299c0dcb15c244c5a00208cde311", strip_prefix = "rules_python-0.31.0", url = "https://github.com/bazelbuild/rules_python/releases/download/0.31.0/rules_python-0.31.0.tar.gz", - ) \ No newline at end of file + ) From 93bd3810abf143a1bad8b2f9696273a8cf2cd383 Mon Sep 17 00:00:00 2001 From: Alex Eagle Date: Sun, 16 Jun 2024 13:49:22 -0700 Subject: [PATCH 2/2] chore: docgen --- docs/rules.md | 33 ++++++++++++++++++++++++++++++ examples/py_binary/BUILD.bazel | 6 ------ examples/py_binary/image_test.yaml | 8 -------- py/BUILD.bazel | 3 ++- py/private/BUILD.bazel | 7 +++++++ py/private/py_image_layers.bzl | 1 + 6 files changed, 43 insertions(+), 15 deletions(-) delete mode 100644 examples/py_binary/image_test.yaml diff --git a/docs/rules.md b/docs/rules.md index cb424ebf..aaa68d75 100644 --- a/docs/rules.md +++ b/docs/rules.md @@ -124,6 +124,39 @@ you can `bazel run [name].venv` to produce this, then use it in the editor. | kwargs | additional named parameters to the py_binary_rule. | none | + + +## py_image_layers + +
+py_image_layers(name, binary, interpreter_regex, site_packages_regex)
+
+ +Create three layers for a py_binary target: interpreter, third-party packages, and application code. + +This allows a container image to have smaller uploads, since the application layer usually changes more +than the other two. + +> [!NOTE] +> The middle layer may duplicate other py_image_layers which have a disjoint set of dependencies. +> Follow https://github.com/aspect-build/rules_py/issues/244 + + +**PARAMETERS** + + +| Name | Description | Default Value | +| :------------- | :------------- | :------------- | +| name | prefix for generated targets, to ensure they are unique within the package | none | +| binary | a py_binary target | none | +| interpreter_regex | a regular expression for use by grep which extracts the interpreter and related files from the binary runfiles tree | "\\.runfiles/.*python.*-.*" | +| site_packages_regex | a regular expression for use by grep which extracts installed packages from the binary runfiles tree | "\\.runfiles/.*/site-packages/.*" | + +**RETURNS** + +a list of labels for the layers, which are tar files + + ## py_library diff --git a/examples/py_binary/BUILD.bazel b/examples/py_binary/BUILD.bazel index cf7e8e6c..fd6bddeb 100644 --- a/examples/py_binary/BUILD.bazel +++ b/examples/py_binary/BUILD.bazel @@ -42,12 +42,6 @@ platform_transition_filegroup( }), ) -container_structure_test( - name = "test", - configs = ["test.yaml"], - image = ":platform_image", -) - # $ bazel run //examples/py_binary:load # $ docker run --rm gcr.io/oci_python_hello_world:latest oci_tarball( diff --git a/examples/py_binary/image_test.yaml b/examples/py_binary/image_test.yaml deleted file mode 100644 index 8a403224..00000000 --- a/examples/py_binary/image_test.yaml +++ /dev/null @@ -1,8 +0,0 @@ -# See https://github.com/GoogleContainerTools/container-structure-test#command-tests -schemaVersion: 2.0.0 -metadataTest: - entrypoint: ["/examples/py_binary/say_hello"] -commandTests: - - name: run - command: /examples/py_binary/say_hello - expectedOutput: ["hello py_binary!"] diff --git a/py/BUILD.bazel b/py/BUILD.bazel index 0aab4cbe..aa796948 100644 --- a/py/BUILD.bazel +++ b/py/BUILD.bazel @@ -4,7 +4,7 @@ load("@bazel_skylib//rules:common_settings.bzl", "string_flag") # For stardoc to reference the files exports_files(["defs.bzl"]) -# For Bazel 6.x compatibility, since +# For Bazel 6.x compatibility, since # PyRuntimeInfo shipped only with Bazel 7 # Users can set, e.g. --@aspect_rules_py//py:interpreter_version=3.9.18 string_flag( @@ -31,6 +31,7 @@ bzl_library( deps = [ "//py/private:py_binary", "//py/private:py_executable", + "//py/private:py_image_layers", "//py/private:py_library", "//py/private:py_pytest_main", "//py/private:py_unpacked_wheel", diff --git a/py/private/BUILD.bazel b/py/private/BUILD.bazel index 568af5a7..ec69df9f 100644 --- a/py/private/BUILD.bazel +++ b/py/private/BUILD.bazel @@ -111,3 +111,10 @@ bzl_library( srcs = ["virtual.bzl"], visibility = ["//py:__subpackages__"], ) + +bzl_library( + name = "py_image_layers", + srcs = ["py_image_layers.bzl"], + visibility = ["//py:__subpackages__"], + deps = ["@aspect_bazel_lib//lib:tar"], +) diff --git a/py/private/py_image_layers.bzl b/py/private/py_image_layers.bzl index 398d6a03..493b1f9c 100644 --- a/py/private/py_image_layers.bzl +++ b/py/private/py_image_layers.bzl @@ -27,6 +27,7 @@ def py_image_layers(name, binary, interpreter_regex = PY_INTERPRETER_REGEX, site binary: a py_binary target interpreter_regex: a regular expression for use by `grep` which extracts the interpreter and related files from the binary runfiles tree site_packages_regex: a regular expression for use by `grep` which extracts installed packages from the binary runfiles tree + Returns: a list of labels for the layers, which are tar files """