diff --git a/.gitignore b/.gitignore index 20dd19a2..fa2c4ec9 100644 --- a/.gitignore +++ b/.gitignore @@ -2,5 +2,5 @@ bazel-* .bazelrc.user .idea .ijwb -.venv +.*.venv/ **/__pycache__ \ No newline at end of file diff --git a/py/defs.bzl b/py/defs.bzl index 5816b784..7f3b6cdb 100644 --- a/py/defs.bzl +++ b/py/defs.bzl @@ -1,5 +1,6 @@ "Public API re-exports" +load("//py/private/venv:venv.bzl", _py_venv = "py_venv") load("//py/private:py_binary.bzl", _py_binary = "py_binary", _py_test = "py_test") load("//py/private:py_library.bzl", _py_library = "py_library") load("//py/private:py_wheel.bzl", "py_wheel_lib") @@ -33,21 +34,16 @@ def py_binary(name, srcs = [], main = None, **kwargs): name = name, srcs = srcs, main = main if main != None else srcs[0], - imports = kwargs.pop("imports", []) + ["."], + imports = kwargs.pop("imports", ["."]), **kwargs ) - native.filegroup( - name = "%s_create_venv_files" % name, - srcs = [name], - tags = ["manual"], - output_group = "create_venv", - ) - - native.sh_binary( + _py_venv( name = "%s.venv" % name, tags = ["manual"], - srcs = [":%s_create_venv_files" % name], + srcs = srcs, + imports = kwargs.pop("imports", ["."]), + **kwargs ) def py_test(name, main = None, srcs = [], **kwargs): @@ -56,21 +52,16 @@ def py_test(name, main = None, srcs = [], **kwargs): name = name, srcs = srcs, main = main if main != None else srcs[0], - imports = kwargs.pop("imports", []) + ["."], + imports = kwargs.pop("imports", ["."]), **kwargs ) - native.filegroup( - name = "%s_create_venv_files" % name, - srcs = [name], - tags = ["manual"], - output_group = "create_venv", - ) - - native.sh_binary( + _py_venv( name = "%s.venv" % name, tags = ["manual"], - srcs = [":%s_create_venv_files" % name], + srcs = srcs, + imports = kwargs.pop("imports", ["."]), + **kwargs ) py_wheel = rule( diff --git a/py/private/BUILD.bazel b/py/private/BUILD.bazel index b8d1aaf6..8fb7be54 100644 --- a/py/private/BUILD.bazel +++ b/py/private/BUILD.bazel @@ -22,6 +22,7 @@ bzl_library( deps = [ ":py_library", ":utils", + "//py/private/venv", "@aspect_bazel_lib//lib:paths", ], ) diff --git a/py/private/entry.tmpl.sh b/py/private/entry.tmpl.sh index 3bfd1d31..b22416c2 100644 --- a/py/private/entry.tmpl.sh +++ b/py/private/entry.tmpl.sh @@ -8,6 +8,45 @@ set -o errexit -o nounset -o pipefail PWD=$(pwd) +forget_past_and_set_path () { + # This should detect bash and zsh, which have a hash command that must + # be called to get it to forget past commands. Without forgetting + # past commands the $PATH changes we made may not be respected + if [ -n "${BASH:-}" -o -n "${ZSH_VERSION:-}" ] ; then + hash -r 2> /dev/null + fi +} + +activate_venv () { + local VENV_LOC=$1 + + # Unset the VIRTUAL_ENV env var if one is set + unset VIRTUAL_ENV + VIRTUAL_ENV="${VENV_LOC}" + export VIRTUAL_ENV + + _OLD_PATH="$PATH" + PATH="${VBIN_LOCATION}:$PATH" + export PATH + + forget_past_and_set_path +} + +deactivate_venv () { + # reset old environment variables + if [ -n "${_OLD_PATH:-}" ] ; then + PATH="${_OLD_PATH:-}" + export PATH + unset _OLD_PATH + fi + + forget_past_and_set_path + + unset VIRTUAL_ENV +} + +# Returns an absolute path to the given location if the path is relative, otherwise return +# the path unchanged. function alocation { local P=$1 if [[ "${P:0:1}" == "/" ]]; then @@ -17,108 +56,32 @@ function alocation { fi } -export BAZEL_WORKSPACE_NAME="{{BAZEL_WORKSPACE_NAME}}" - -function wheel_location { - local P=$1 - if [[ "${P:0:3}" == "../" ]]; then - echo $(rlocation "${P:3}") - else - echo $(rlocation "${BAZEL_WORKSPACE_NAME}/${P}") - fi -} - -export -f wheel_location - -# Resolved from the py_interpreter via PyInterpreterInfo. -PYTHON_LOCATION="$(rlocation {{PYTHON_INTERPRETER_PATH}})" +PYTHON_LOCATION="$(alocation $(rlocation {{PYTHON_INTERPRETER_PATH}}))" PYTHON="${PYTHON_LOCATION} {{INTERPRETER_FLAGS}}" -PYTHON_BIN_DIR=$(dirname "${PYTHON}") +PYTHON_VERSION=$(${PYTHON} -c 'import platform; print(platform.python_version())') +PYTHON_BIN_DIR=$(dirname "${PYTHON_LOCATION}") PIP_LOCATION="${PYTHON_BIN_DIR}/pip" -PYTHON_SITE_PACKAGES=$(${PYTHON} -c 'import site; print(site.getsitepackages()[0])') -PTH_FILE="$(alocation "$(rlocation {{PTH_FILE}})")" -PIP_FIND_LINKS_SH=$(rlocation {{PIP_FIND_LINKS_SH}}) -PIP_FIND_LINKS=$("${PIP_FIND_LINKS_SH}" | tr '\n' ' ') ENTRYPOINT="$(rlocation {{BINARY_ENTRY_POINT}})" # Convenience vars for the Python virtual env that's created. -RUNFILES_VENV_LOCATION=$(alocation "${RUNFILES_DIR}/{{VENV_NAME}}") -VENV_LOCATION="{{VENV_LOCATION}}" +VENV_SOURCE="$(alocation $(rlocation {{VENV_SOURCE}}))" +VENV_LOCATION="$(alocation ${RUNFILES_DIR}/{{VENV_NAME}})" VBIN_LOCATION="${VENV_LOCATION}/bin" -VPIP_LOCATION="${VBIN_LOCATION}/pip" VPYTHON="${VBIN_LOCATION}/python3 {{INTERPRETER_FLAGS}}" -VPIP="${VPYTHON} -m pip" - -# Create a virtual env to run inside. This allows us to not have to manipulate the PYTHON_PATH to find external -# dependencies. -# We can also now specify the `-I` (isolated) flag to Python, stopping Python from adding the script path to sys.path[0] -# which we have no control over otherwise. -# This does however have some side effects as now all other PYTHON* env vars are ignored. - -# The venv is intentionally created without pip, as when the venv is created with pip, `ensurepip` is used which will -# use the bundled version of pip, which does not match the version of pip bundled with the interpreter distro. -# So we symlink in this ourselves. -VENV_FLAGS=( - "--without-pip" - "--clear" -) -${PYTHON} -m venv "${VENV_LOCATION}" "${VENV_FLAGS[@]}" - -# Activate the venv, disable changing the prompt -export VIRTUAL_ENV_DISABLE_PROMPT=1 -. "${VBIN_LOCATION}/activate" -unset VIRTUAL_ENV_DISABLE_PROMPT - -# Now symlink in pip from the toolchain -# Also link to `pip` as well as `pip3`. Python venv will also link `pip3.x`, but this seems unnecessary for this use -ln -snf "${PIP_LOCATION}" "${VPIP_LOCATION}" -ln -snf "${VPIP_LOCATION}" "${VBIN_LOCATION}/pip3" - -# Need to symlink in the pip site-packages folder not just the binary. -# Ask Python where the site-packages folder is and symlink the pip package in from the toolchain -VENV_SITE_PACKAGES=$(${VPYTHON} -c 'import site; print(site.getsitepackages()[0])') -ln -snf "${PYTHON_SITE_PACKAGES}/pip" "${VENV_SITE_PACKAGES}/pip" -ln -snf "${PYTHON_SITE_PACKAGES}/_distutils_hack" "${VENV_SITE_PACKAGES}/_distutils_hack" -ln -snf "${PYTHON_SITE_PACKAGES}/setuptools" "${VENV_SITE_PACKAGES}/setuptools" - -INSTALL_WHEELS={{INSTALL_WHEELS}} -if [ "$INSTALL_WHEELS" = true ]; then - # Call to pip to "install" our dependencies. The `find-links` section in the config points to the external downloaded wheels, - # while `--no-index` ensures we don't reach out to PyPi - # We may hit command line length limits if passing a large number of find-links flags, so set them on the PIP_FIND_LINKS env var - export PIP_FIND_LINKS - - # TODO: This can likely be generated by an action up front, but this is fine for now - read -r -a WHEELS <<< "${PIP_FIND_LINKS}" - REQUIREMENTS_FILE="$(mktemp)" - printf "%s\n" "${WHEELS[@]}" > "${REQUIREMENTS_FILE}" - - PIP_FLAGS=( - "--quiet" - "--no-compile" - "--require-virtualenv" - "--no-input" - "--no-cache-dir" - "--disable-pip-version-check" - "--no-python-version-warning" - "--only-binary=:all:" - "--no-dependencies" - "--no-index" - ) - - ${VPIP} install "${PIP_FLAGS[@]}" -r "${REQUIREMENTS_FILE}" - rm "${REQUIREMENTS_FILE}" - - unset PIP_FIND_LINKS -fi -# Create the site-packages pth file containing all our first party dependency paths. These are from all direct and transitive -# py_library rules. -# The .pth file adds to the interpreters sys.path, without having to set `PYTHONPATH`. This allows us to still -# run with the interpreter with the `-I` flag. This stops some import mechanisms breaking out the sandbox by using -# relative imports. -# This is cat'd in so we don't have to have more fun with runfiles symlink paths. -cat "${PTH_FILE}" > "${VENV_SITE_PACKAGES}/first_party.pth" +mkdir "${VENV_LOCATION}" 2>/dev/null || true +ln -snf "${VENV_SOURCE}/include" "${VENV_LOCATION}/include" +ln -snf "${VENV_SOURCE}/lib" "${VENV_LOCATION}/lib" + +mkdir "${VBIN_LOCATION}" 2>/dev/null || true +ln -snf ${VENV_SOURCE}/bin/* "${VBIN_LOCATION}/" +ln -snf "${PYTHON_LOCATION}" "${VBIN_LOCATION}/python3" + +echo "home = ${VBIN_LOCATION}" > "${VENV_LOCATION}/pyvenv.cfg" +echo "include-system-site-packages = false" >> "${VENV_LOCATION}/pyvenv.cfg" +echo "version = ${PYTHON_VERSION}" >> "${VENV_LOCATION}/pyvenv.cfg" + +activate_venv "${VBIN_LOCATION}" # Set all the env vars here, just before we launch {{PYTHON_ENV}} @@ -131,9 +94,7 @@ if [ "$RUN_BINARY_ENTRY_POINT" = true ]; then ${VPYTHON} "${ENTRYPOINT}" -- "$@" fi -# Deactivate the venv -deactivate +deactivate_venv # Unset any set env vars {{PYTHON_ENV_UNSET}} -unset BAZEL_WORKSPACE_NAME diff --git a/py/private/py_binary.bzl b/py/private/py_binary.bzl index 4fa37d20..1428a9d8 100644 --- a/py/private/py_binary.bzl +++ b/py/private/py_binary.bzl @@ -3,129 +3,40 @@ load("@aspect_bazel_lib//lib:paths.bzl", "BASH_RLOCATION_FUNCTION", "to_manifest_path") load("//py/private:py_library.bzl", _py_library = "py_library_utils") load("//py/private:providers.bzl", "PyWheelInfo") -load("//py/private:utils.bzl", "dict_to_exports") - -PY_TOOLCHAIN = "@bazel_tools//tools/python:toolchain_type" -SH_TOOLCHAIN = "@bazel_tools//tools/sh:toolchain_type" - -def _strip_external(path): - return path[len("external/"):] if path.startswith("external/") else path - -def _wheel_path_map(file): - return file.short_path - -def _resolve_toolchain(ctx): - toolchain_info = ctx.toolchains[PY_TOOLCHAIN] - - if not toolchain_info.py3_runtime: - fail("A py3_runtime must be set on the Python toolchain") - - py3_toolchain = toolchain_info.py3_runtime - - interpreter_path = None - if py3_toolchain.interpreter_path: - interpreter_path = py3_toolchain.interpreter_path - else: - interpreter_path = to_manifest_path(ctx, py3_toolchain.interpreter) - - if interpreter_path == None: - fail("Unable to resolve a path to the Python interperter") - - return struct( - toolchain = py3_toolchain, - path = interpreter_path, - flags = ["-B", "-s", "-I"], - ) +load("//py/private:utils.bzl", "PY_TOOLCHAIN", "SH_TOOLCHAIN", "dict_to_exports", "resolve_toolchain") +load("//py/private/venv:venv.bzl", _py_venv = "py_venv_utils") def _py_binary_rule_imp(ctx): bash_bin = ctx.toolchains[SH_TOOLCHAIN].path - interpreter = _resolve_toolchain(ctx) + interpreter = resolve_toolchain(ctx) main = ctx.file.main - runfiles_files = [] + ctx.files._runfiles_lib + venv_info = _py_venv.make_venv( + ctx, + name = "%s.venv" % ctx.attr.name, + strip_pth_workspace_root = False, + ) - entry = ctx.actions.declare_file(ctx.attr.name) env = dict({ "BAZEL_TARGET": ctx.label, "BAZEL_WORKSPACE": ctx.workspace_name, "BAZEL_TARGET_NAME": ctx.attr.name, }, **ctx.attr.env) - # Get each path to every wheel we need, this includes the transitive wheels - # As these are just filegroups, then we need to dig into the default_runfiles to get the transitive files - # Create a depset for all these - wheels_depsets = [ - target[PyWheelInfo].files - for target in ctx.attr.deps - if PyWheelInfo in target - ] - wheels_depset = depset( - transitive = wheels_depsets, - ) - - # To avoid calling to_list, and then either creating a lot of extra symlinks or adding a large number - # of find-links flags to pip, we can create a conf file and add a file-links section. - # Create this via the an args action so we can work directly with the depset - pip_find_links_sh = ctx.actions.declare_file("%s.pip.conf.sh" % ctx.attr.name) - runfiles_files.append(pip_find_links_sh) - - find_links_lines = ctx.actions.args() - - # Note the format here is set to multiline so that each line isn't shell quoted - find_links_lines.set_param_file_format(format = "multiline") - - find_links_lines.add("#!%s" % bash_bin) - find_links_lines.add_all(wheels_depset, map_each = _wheel_path_map, format_each = "echo $(wheel_location %s)") - - ctx.actions.write( - output = pip_find_links_sh, - content = find_links_lines, - ) - - # Create a depset from the `imports` depsets, then pass this to Args to create the `.pth` file. - # This avoids having to call `.to_list` on the depset and taking the perf hit. - # We also need to collect our own "imports" attr. - # Can reuse the helper from py_library, as it's the same process - imports_depset = _py_library.make_imports_depset(ctx) - - pth = ctx.actions.declare_file("%s.pth" % ctx.attr.name) - runfiles_files.append(pth) - - pth_lines = ctx.actions.args() - - # The venv is created at the root of the runfiles tree, in 'VENV_NAME', the full path is "${RUNFILES_DIR}/${VENV_NAME}", - # but depending on if we are running as the top level binary or a tool, then $RUNFILES_DIR may be absolute or relative. - # Paths in the .pth are relative to the site-packages folder where they reside. - # All "import" paths from `py_library` start with the workspace name, so we need to go back up the tree for - # each segment from site-packages in the venv to the root of the runfiles tree. - # Four .. will get us back to the root of the venv: - # {name}.runfiles/.{name}.venv/lib/python{version}/site-packages/first_party.pth - escape = ([".."] * 4) - pth_lines.add_all(imports_depset, format_each = "/".join(escape) + "/%s") - - ctx.actions.write( - output = pth, - content = pth_lines, - ) - common_substitutions = { "{{BASH_BIN}}": bash_bin, "{{BASH_RLOCATION_FN}}": BASH_RLOCATION_FUNCTION, - "{{BAZEL_WORKSPACE_NAME}}": ctx.workspace_name, "{{BINARY_ENTRY_POINT}}": to_manifest_path(ctx, main), "{{INTERPRETER_FLAGS}}": " ".join(interpreter.flags), - "{{INTERPRETER_FLAGS_PARTS}}": " ".join(['"%s", ' % f for f in interpreter.flags]), - "{{INSTALL_WHEELS}}": str(len(wheels_depsets) > 0).lower(), - "{{PIP_FIND_LINKS_SH}}": to_manifest_path(ctx, pip_find_links_sh), - "{{PTH_FILE}}": to_manifest_path(ctx, pth), - "{{PYTHON_INTERPRETER_PATH}}": interpreter.path, + "{{PYTHON_INTERPRETER_PATH}}": to_manifest_path(ctx, interpreter.python), "{{RUN_BINARY_ENTRY_POINT}}": "true", - "{{VENV_NAME}}": ".%s.venv" % ctx.attr.name, - "{{VENV_LOCATION}}": "${RUNFILES_VENV_LOCATION}", + "{{VENV_SOURCE}}": to_manifest_path(ctx, venv_info.venv_directory), + "{{VENV_NAME}}": "%s.venv" % ctx.attr.name, "{{PYTHON_ENV}}": "\n".join(dict_to_exports(env)).strip(), "{{PYTHON_ENV_UNSET}}": "\n".join(["unset %s" % k for k in env.keys()]).strip(), } + entry = ctx.actions.declare_file(ctx.attr.name) ctx.actions.expand_template( template = ctx.file._entry, output = entry, @@ -133,30 +44,18 @@ def _py_binary_rule_imp(ctx): is_executable = True, ) - create_venv_bin = ctx.actions.declare_file("%s_create_venv.sh" % ctx.attr.name) - ctx.actions.expand_template( - template = ctx.file._entry, - output = create_venv_bin, - substitutions = dict( - common_substitutions, - **{ - "{{RUN_BINARY_ENTRY_POINT}}": "false", - "{{VENV_LOCATION}}": "${BUILD_WORKSPACE_DIRECTORY}/$@", - } - ), - is_executable = True, - ) - srcs_depset = _py_library.make_srcs_depset(ctx) runfiles = _py_library.make_merged_runfiles( ctx, extra_depsets = [ + venv_info.venv_creation_depset, interpreter.toolchain.files, - wheels_depset, srcs_depset, ], - extra_runfiles = runfiles_files, + extra_runfiles = [ + venv_info.venv_directory, + ] + ctx.files._runfiles_lib, extra_runfiles_depsets = [ target[PyWheelInfo].default_runfiles for target in ctx.attr.deps @@ -170,30 +69,32 @@ def _py_binary_rule_imp(ctx): runfiles = runfiles, executable = entry, ), - OutputGroupInfo( - create_venv = [create_venv_bin], - ), # Return PyInfo? ] +_attrs = dict({ + "env": attr.string_dict( + default = {}, + ), + "main": attr.label( + allow_single_file = True, + mandatory = True, + ), + "_entry": attr.label( + allow_single_file = True, + default = "//py/private:entry.tmpl.sh", + ), + "_runfiles_lib": attr.label( + default = "@bazel_tools//tools/bash/runfiles", + ), +}) + +_attrs.update(**_py_venv.attrs) +_attrs.update(**_py_library.attrs) + py_base = struct( implementation = _py_binary_rule_imp, - attrs = dict({ - "env": attr.string_dict( - default = {}, - ), - "main": attr.label( - allow_single_file = True, - mandatory = True, - ), - "_entry": attr.label( - allow_single_file = True, - default = "//py/private:entry.tmpl.sh", - ), - "_runfiles_lib": attr.label( - default = "@bazel_tools//tools/bash/runfiles", - ), - }, **_py_library.attrs), + attrs = _attrs, toolchains = [ SH_TOOLCHAIN, PY_TOOLCHAIN, diff --git a/py/private/py_library.bzl b/py/private/py_library.bzl index 5dbdbd5d..d78fd1d5 100644 --- a/py/private/py_library.bzl +++ b/py/private/py_library.bzl @@ -80,7 +80,6 @@ _attrs = dict({ ), "deps": attr.label_list( allow_files = True, - # Ideally we'd have a PyWheelInfo provider here so we can restrict the dependency set providers = [[PyInfo], [PyWheelInfo]], ), "data": attr.label_list( diff --git a/py/private/py_wheel.bzl b/py/private/py_wheel.bzl index e64f2e41..2d208f16 100644 --- a/py/private/py_wheel.bzl +++ b/py/private/py_wheel.bzl @@ -3,7 +3,7 @@ load("@bazel_skylib//lib:types.bzl", "types") load("//py/private:providers.bzl", "PyWheelInfo") -_ATTRS = { +_attrs = { "src": attr.label( allow_files = [".whl"], ), @@ -43,7 +43,7 @@ def _py_wheel_impl(ctx): py_wheel_lib = struct( implementation = _py_wheel_impl, - attrs = _ATTRS, + attrs = _attrs, provides = [PyWheelInfo], make_py_wheel_info = _make_py_wheel_info, ) diff --git a/py/private/utils.bzl b/py/private/utils.bzl index 0be9fa60..ea679bef 100644 --- a/py/private/utils.bzl +++ b/py/private/utils.bzl @@ -1,5 +1,22 @@ +PY_TOOLCHAIN = "@bazel_tools//tools/python:toolchain_type" +SH_TOOLCHAIN = "@bazel_tools//tools/sh:toolchain_type" + def dict_to_exports(env): return [ "export %s=\"%s\"" % (k, v) for (k, v) in env.items() ] + +def resolve_toolchain(ctx): + toolchain_info = ctx.toolchains[PY_TOOLCHAIN] + + if not toolchain_info.py3_runtime: + fail("A py3_runtime must be set on the Python toolchain") + + py3_toolchain = toolchain_info.py3_runtime + + return struct( + toolchain = py3_toolchain, + python = py3_toolchain.interpreter, + flags = ["-B", "-s", "-I"], + ) diff --git a/py/private/venv/BUILD.bazel b/py/private/venv/BUILD.bazel new file mode 100644 index 00000000..120e6618 --- /dev/null +++ b/py/private/venv/BUILD.bazel @@ -0,0 +1,15 @@ +load("@bazel_skylib//:bzl_library.bzl", "bzl_library") + +exports_files(["venv.tmpl.sh"]) + +bzl_library( + name = "venv", + srcs = ["venv.bzl"], + visibility = ["//py:__subpackages__"], + deps = [ + "//py/private:providers", + "//py/private:py_library", + "//py/private:utils", + "@aspect_bazel_lib//lib:paths", + ], +) diff --git a/py/private/venv/venv.bzl b/py/private/venv/venv.bzl new file mode 100644 index 00000000..cd10c57b --- /dev/null +++ b/py/private/venv/venv.bzl @@ -0,0 +1,222 @@ +load("@aspect_bazel_lib//lib:paths.bzl", "BASH_RLOCATION_FUNCTION", "to_manifest_path") +load("//py/private:providers.bzl", "PyWheelInfo") +load("//py/private:py_library.bzl", _py_library = "py_library_utils") +load("//py/private:utils.bzl", "PY_TOOLCHAIN", "SH_TOOLCHAIN", "dict_to_exports", "resolve_toolchain") + +def _wheel_path_map(file): + return file.path + +def _pth_import_line_map(line): + # Strip the leading workspace name off the import + return "/".join(line.split("/")[1:]) + +def _get_attr(ctx, attr, override): + if override == None and hasattr(ctx, attr): + return getattr(ctx, attr) + else: + return override + +def _make_venv(ctx, name = None, main = None, strip_pth_workspace_root = None): + bash_bin = ctx.toolchains[SH_TOOLCHAIN].path + interpreter = resolve_toolchain(ctx) + + name = _get_attr(ctx.attr, "name", name) + strip_pth_workspace_root = _get_attr(ctx.attr, "strip_pth_workspace_root", strip_pth_workspace_root) + + # Get each path to every wheel we need, this includes the transitive wheels + # Get each path to every wheel we need, this includes the transitive wheels + # As these are just filegroups, then we need to dig into the default_runfiles to get the transitive files + # Create a depset for all these + wheels_depsets = [ + target[PyWheelInfo].files + for target in ctx.attr.deps + if PyWheelInfo in target + ] + wheels_depset = depset( + transitive = wheels_depsets, + ) + + # To avoid calling to_list, and then either creating a lot of extra symlinks or adding a large number + # of find-links flags to pip, we can create a conf file and add a file-links section. + # Create this via the an args action so we can work directly with the depset + whl_requirements = ctx.actions.declare_file("%s.requirements.txt" % name) + + whl_requirements_lines = ctx.actions.args() + + # Note the format here is set to multiline so that each line isn't shell quoted + whl_requirements_lines.set_param_file_format(format = "multiline") + whl_requirements_lines.add_all(wheels_depset, map_each = _wheel_path_map) + + ctx.actions.write( + output = whl_requirements, + content = whl_requirements_lines, + ) + + # Create a depset from the `imports` depsets, then pass this to Args to create the `.pth` file. + # This avoids having to call `.to_list` on the depset and taking the perf hit. + # We also need to collect our own "imports" attr. + # Can reuse the helper from py_library, as it's the same process + imports_depset = _py_library.make_imports_depset(ctx) + + pth = ctx.actions.declare_file("%s.pth" % name) + + pth_lines = ctx.actions.args() + + # The venv is created at the root of the runfiles tree, in 'VENV_NAME', the full path is "${RUNFILES_DIR}/${VENV_NAME}", + # but depending on if we are running as the top level binary or a tool, then $RUNFILES_DIR may be absolute or relative. + # Paths in the .pth are relative to the site-packages folder where they reside. + # All "import" paths from `py_library` start with the workspace name, so we need to go back up the tree for + # each segment from site-packages in the venv to the root of the runfiles tree. + # Four .. will get us back to the root of the venv: + # {name}.runfiles/.{name}.venv/lib/python{version}/site-packages/first_party.pth + escape = ([".."] * 4) + pth_add_all_kwargs = dict({ + "format_each": "/".join(escape) + "/%s", + }) + + # If we are creating a venv for an IDE we likely don't have a workspace folder at with everything inside, so strip + # this from the import paths. + # We can't pass variables to the map_each functions, so conditionally add it instead. + if strip_pth_workspace_root: + pth_add_all_kwargs.update({ + "map_each": _pth_import_line_map, + }) + + pth_lines.add_all( + imports_depset, + **pth_add_all_kwargs + ) + + ctx.actions.write( + output = pth, + content = pth_lines, + ) + + venv_sibling = _get_attr(ctx.file, "main", main) + venv_directory = ctx.actions.declare_directory("%s.source" % name, sibling = venv_sibling) + + common_substitutions = { + "{{BASH_BIN}}": bash_bin, + "{{BASH_RLOCATION_FN}}": BASH_RLOCATION_FUNCTION, + "{{BAZEL_WORKSPACE_NAME}}": ctx.workspace_name, + "{{INTERPRETER_FLAGS}}": " ".join(interpreter.flags), + "{{INSTALL_WHEELS}}": str(len(wheels_depsets) > 0).lower(), + "{{WHL_REQUIREMENTS_FILE}}": whl_requirements.path, + "{{PTH_FILE}}": pth.path, + "{{PYTHON_INTERPRETER_PATH}}": interpreter.python.path, + "{{VENV_LOCATION}}": venv_directory.path, + "{{USE_MANIFEST_PATH}}": "false", + } + + make_venv_for_action_sh = ctx.actions.declare_file(name + "_venv.sh") + ctx.actions.expand_template( + template = ctx.file._venv_tmpl, + output = make_venv_for_action_sh, + substitutions = common_substitutions, + is_executable = True, + ) + + make_venv_for_ide_sh = ctx.actions.declare_file("%s_create_venv.sh" % name) + ctx.actions.expand_template( + template = ctx.file._venv_tmpl, + output = make_venv_for_ide_sh, + substitutions = dict( + common_substitutions, + **{ + "{{WHL_REQUIREMENTS_FILE}}": to_manifest_path(ctx, whl_requirements), + "{{PTH_FILE}}": to_manifest_path(ctx, pth), + "{{PYTHON_INTERPRETER_PATH}}": interpreter.python.path, + "{{VENV_LOCATION}}": "${BUILD_WORKSPACE_DIRECTORY}/.%s" % name, + "{{USE_MANIFEST_PATH}}": "true", + } + ), + is_executable = True, + ) + + venv_creation_depset = depset( + direct = [make_venv_for_action_sh, pth, whl_requirements], + transitive = [wheels_depset, interpreter.toolchain.files], + ) + + ctx.actions.run_shell( + outputs = [venv_directory], + inputs = venv_creation_depset, + command = make_venv_for_action_sh.path, + tools = [ + interpreter.toolchain.files, + ], + progress_message = "Creating virtual environment for %{label}", + mnemonic = "CreateVenv", + ) + + return struct( + venv_directory = venv_directory, + make_venv_for_action_sh = make_venv_for_action_sh, + make_venv_for_ide_sh = make_venv_for_ide_sh, + venv_creation_depset = venv_creation_depset, + ) + +def _py_venv_impl(ctx): + interpreter = resolve_toolchain(ctx) + venv_info = _make_venv(ctx) + + runfiles = _py_library.make_merged_runfiles( + ctx, + extra_depsets = [ + venv_info.venv_creation_depset, + interpreter.toolchain.files, + ], + extra_runfiles = ctx.files._runfiles_lib, + extra_runfiles_depsets = [ + target[PyWheelInfo].default_runfiles + for target in ctx.attr.deps + if PyWheelInfo in target + ], + ) + + return [ + DefaultInfo( + files = depset([ + venv_info.make_venv_for_ide_sh, + ]), + runfiles = runfiles, + executable = venv_info.make_venv_for_ide_sh, + ), + ] + +_common_attrs = dict({ + "_venv_tmpl": attr.label( + allow_single_file = True, + default = "//py/private/venv:venv.tmpl.sh", + ), + "_runfiles_lib": attr.label( + default = "@bazel_tools//tools/bash/runfiles", + ), +}) + +_toolchains = [ + SH_TOOLCHAIN, + PY_TOOLCHAIN, +] + +_attrs = dict({ + "strip_pth_workspace_root": attr.bool( + default = True, + ), +}) + +_attrs.update(**_common_attrs) +_attrs.update(**_py_library.attrs) + +py_venv = rule( + implementation = _py_venv_impl, + attrs = _attrs, + toolchains = _toolchains, + executable = True, +) + +py_venv_utils = struct( + attrs = _common_attrs, + toolchains = _toolchains, + make_venv = _make_venv, +) diff --git a/py/private/venv/venv.tmpl.sh b/py/private/venv/venv.tmpl.sh new file mode 100644 index 00000000..16fd4e48 --- /dev/null +++ b/py/private/venv/venv.tmpl.sh @@ -0,0 +1,162 @@ +#!{{BASH_BIN}} + +USE_MANIFEST_PATH={{USE_MANIFEST_PATH}} + +if [ "$USE_MANIFEST_PATH" = true ]; then + {{BASH_RLOCATION_FN}} + runfiles_export_envvars +fi + +set -o errexit -o nounset -o pipefail + +PWD=$(pwd) + +export BAZEL_WORKSPACE_NAME="{{BAZEL_WORKSPACE_NAME}}" + +function alocation { + local P=$1 + if [[ "${P:0:1}" == "/" ]]; then + echo "${P}" + else + echo "${PWD}/${P}" + fi +} + +function maybe_rlocation() { + local P=$1 + if [ "$USE_MANIFEST_PATH" = false ]; then + echo "${P}" + else + local MP + MP=$(rlocation "${P}") + echo "${MP}" + fi +} + +# Resolved from the py_interpreter via PyInterpreterInfo. +PYTHON_LOCATION="{{PYTHON_INTERPRETER_PATH}}" +PYTHON="${PYTHON_LOCATION} {{INTERPRETER_FLAGS}}" +REAL_PYTHON_LOCATION=$(${PYTHON} -c 'import sys; import os; print(os.path.realpath(sys.executable))') +PYTHON_SITE_PACKAGES=$(${PYTHON} -c 'import site; print(site.getsitepackages()[0])') +PYTHON_BIN_DIR=$(${PYTHON} -c 'import sys; import os; print(os.path.dirname(sys.executable))') +PIP_LOCATION="${PYTHON_BIN_DIR}/pip" +PTH_FILE=$(maybe_rlocation "{{PTH_FILE}}") +WHL_REQUIREMENTS_FILE=$(maybe_rlocation "{{WHL_REQUIREMENTS_FILE}}") + +# Convenience vars for the Python virtual env that's created. +VENV_LOCATION="{{VENV_LOCATION}}" +VBIN_LOCATION="${VENV_LOCATION}/bin" +VPIP_LOCATION="${VBIN_LOCATION}/pip" +VPYTHON="${VBIN_LOCATION}/python3 {{INTERPRETER_FLAGS}}" +VPIP="${VPYTHON} -m pip" + +# Create a virtual env to run inside. This allows us to not have to manipulate the PYTHON_PATH to find external +# dependencies. +# We can also now specify the `-I` (isolated) flag to Python, stopping Python from adding the script path to sys.path[0] +# which we have no control over otherwise. +# This does however have some side effects as now all other PYTHON* env vars are ignored. + +# The venv is intentionally created without pip, as when the venv is created with pip, `ensurepip` is used which will +# use the bundled version of pip, which does not match the version of pip bundled with the interpreter distro. +# So we symlink in this ourselves. +VENV_FLAGS=( + "--without-pip" + "--clear" + # Setting copies seems to break as venv doesn't copy libs when being forced to do copying rather than symlinks, + # so we do it manually before starting the binary +) + +${PYTHON} -m venv "${VENV_LOCATION}" "${VENV_FLAGS[@]}" + +# Activate the venv, disable changing the prompt +export VIRTUAL_ENV_DISABLE_PROMPT=1 +. "${VBIN_LOCATION}/activate" +unset VIRTUAL_ENV_DISABLE_PROMPT + +# Need to keep track of symlinks created inside the venv that are from outside and remove them after. +# Bazel will fail to validate the tree artifact created otherwise. +VENV_BIN_SYMLINKS=$(find "${VBIN_LOCATION}" -type l) +SYMLINKS=(${VENV_BIN_SYMLINKS}) + +# Now symlink in pip from the toolchain +# Python venv will also link `pip3.x`, but this seems unnecessary for this use +ln -snf "${PIP_LOCATION}" "${VPIP_LOCATION}" +SYMLINKS+=("${VPIP_LOCATION}") + +# Need to symlink in the pip site-packages folder not just the binary. +# Ask Python where the site-packages folder is and symlink the pip package in from the toolchain +VENV_SITE_PACKAGES=$(${VPYTHON} -c 'import site; print(site.getsitepackages()[0])') +ln -snf "${PYTHON_SITE_PACKAGES}/pip" "${VENV_SITE_PACKAGES}/pip" +SYMLINKS+=("${VENV_SITE_PACKAGES}/pip") + +ln -snf "${PYTHON_SITE_PACKAGES}/_distutils_hack" "${VENV_SITE_PACKAGES}/_distutils_hack" +SYMLINKS+=( "${VENV_SITE_PACKAGES}/_distutils_hack") + +ln -snf "${PYTHON_SITE_PACKAGES}/setuptools" "${VENV_SITE_PACKAGES}/setuptools" +SYMLINKS+=( "${VENV_SITE_PACKAGES}/setuptools") + +INSTALL_WHEELS={{INSTALL_WHEELS}} +if [ "$INSTALL_WHEELS" = true ]; then + # Call to pip to "install" our dependencies. The `find-links` section in the config points to the external downloaded wheels, + # while `--no-index` ensures we don't reach out to PyPi + # We may hit command line length limits if passing a large number of find-links flags, so set them on the PIP_FIND_LINKS env var + PIP_FIND_LINKS=$(tr '\n' ' ' < "${WHL_REQUIREMENTS_FILE}") + export PIP_FIND_LINKS + + PIP_FLAGS=( + "--quiet" + "--no-compile" + "--require-virtualenv" + "--no-input" + "--no-cache-dir" + "--disable-pip-version-check" + "--no-python-version-warning" + "--only-binary=:all:" + "--no-dependencies" + "--no-index" + ) + + ${VPIP} install "${PIP_FLAGS[@]}" -r "${WHL_REQUIREMENTS_FILE}" + + unset PIP_FIND_LINKS +fi + +# Create the site-packages pth file containing all our first party dependency paths. These are from all direct and transitive +# py_library rules. +# The .pth file adds to the interpreters sys.path, without having to set `PYTHONPATH`. This allows us to still +# run with the interpreter with the `-I` flag. This stops some import mechanisms breaking out the sandbox by using +# relative imports. +cat "${PTH_FILE}" > "${VENV_SITE_PACKAGES}/first_party.pth" + +# Remove the cfg file as it contains absolute paths. +# The entrypoint script for py_binary and py_test will create a new one. +# For local venvs, we'll create a new one below. +PYVENV_CFG="${VENV_LOCATION}/pyvenv.cfg" +rm "${PYVENV_CFG}" + +if [ "$USE_MANIFEST_PATH" = false ]; then + # Tear down the symlinks created above as these won't be able to be resolved by bazel when validating the TreeArtifact + for symlink in "${SYMLINKS[@]}"; do + rm "${symlink}" + done +fi + +if [ "$USE_MANIFEST_PATH" = true ]; then + # If we are in a 'bazel run' then remove the symlinks to the execroot Python and replace them with a link to external + rm ${VBIN_LOCATION}/python* + + ln -snf "${REAL_PYTHON_LOCATION}" "${VBIN_LOCATION}/python" + ln -snf "${VBIN_LOCATION}/python" "${VBIN_LOCATION}/python3" + + PYTHON_SYMLINK_VERSION_SUFFIX=$(${PYTHON} -c 'import sys; print(f"{sys.version_info.major}.{sys.version_info.minor}")') + ln -snf "${VBIN_LOCATION}/python" "${VBIN_LOCATION}/python${PYTHON_SYMLINK_VERSION_SUFFIX}" + + PYTHON_VERSION=$(${PYTHON} -c 'import platform; print(platform.python_version())') + echo "home = ${VBIN_LOCATION}" > "${PYVENV_CFG}" + echo "include-system-site-packages = false" >> "${PYVENV_CFG}" + echo "version = ${PYTHON_VERSION}" >> "${PYVENV_CFG}" + + chmod +x "${VBIN_LOCATION}/activate" + chmod +x "${VBIN_LOCATION}/activate.csh" + chmod +x "${VBIN_LOCATION}/activate.fish" +fi \ No newline at end of file diff --git a/py/tests/external-deps/BUILD.bazel b/py/tests/external-deps/BUILD.bazel index adb77a1e..be49f77c 100644 --- a/py/tests/external-deps/BUILD.bazel +++ b/py/tests/external-deps/BUILD.bazel @@ -22,6 +22,7 @@ py_binary( srcs = ["__main__.py"], deps = [ ":lib", + "//examples/foo", "@pypi_django//:wheel", ], ) diff --git a/py/tests/external-deps/__main__.py b/py/tests/external-deps/__main__.py index d8abd683..2ed97429 100644 --- a/py/tests/external-deps/__main__.py +++ b/py/tests/external-deps/__main__.py @@ -1,7 +1,6 @@ import os import site import sys -import django import inspect print(f'Python: {sys.executable}') @@ -16,9 +15,14 @@ print(f'\nEntrypoint Path: {__file__}') +import django print(f'\nDjango location: {django.__file__}') print(f'Django version: {django.__version__}') from lib import greet print(f'\nFrom lib with wheel dependency: {greet("Matt")}') print(f'lib filepath: {inspect.getsourcefile(greet)}') + +from foo import get_branding +print(f"From lib in another package: {get_branding()}") +print(f'lib filepath: {inspect.getsourcefile(get_branding)}') diff --git a/py/tests/external-deps/expected b/py/tests/external-deps/expected old mode 100755 new mode 100644 index 27635b79..ec3b30f9 --- a/py/tests/external-deps/expected +++ b/py/tests/external-deps/expected @@ -1,21 +1,25 @@ -Python: (pwd)/bazel-out/host/bin/py/tests/external-deps/main.runfiles/.main.venv/bin/python3 +Python: (pwd)/bazel-out/host/bin/py/tests/external-deps/main.runfiles/main.venv/bin/python3 version: 3.9.10 (main, REDACTED) [Clang 13.0.1 ] version info: sys.version_info(major=3, minor=9, micro=10, releaselevel='final', serial=0) cwd: (pwd) -site-packages folder: ['(pwd)/bazel-out/host/bin/py/tests/external-deps/main.runfiles/.main.venv/lib/python3.9/site-packages'] +site-packages folder: ['(pwd)/bazel-out/host/bin/py/tests/external-deps/main.runfiles/main.venv/lib/python3.9/site-packages'] sys path: (py_toolchain)/lib/python39.zip (py_toolchain)/lib/python3.9 (py_toolchain)/lib/python3.9/lib-dynload -(pwd)/bazel-out/host/bin/py/tests/external-deps/main.runfiles/.main.venv/lib/python3.9/site-packages +(pwd)/bazel-out/host/bin/py/tests/external-deps/main.runfiles/main.venv/lib/python3.9/site-packages (pwd)/bazel-out/host/bin/py/tests/external-deps/main.runfiles/aspect_rules_py/py/tests/external-deps +(pwd)/bazel-out/host/bin/py/tests/external-deps/main.runfiles/aspect_rules_py/examples +(pwd)/bazel-out/host/bin/py/tests/external-deps/main.runfiles/aspect_rules_py/examples/foo Entrypoint Path: (pwd)/bazel-out/host/bin/py/tests/external-deps/main.runfiles/aspect_rules_py/py/tests/external-deps/__main__.py -Django location: (pwd)/bazel-out/host/bin/py/tests/external-deps/main.runfiles/.main.venv/lib/python3.9/site-packages/django/__init__.py +Django location: (pwd)/bazel-out/host/bin/py/tests/external-deps/main.runfiles/main.venv/lib/python3.9/site-packages/django/__init__.py Django version: 4.0.2 From lib with wheel dependency: Hello Matt lib filepath: (pwd)/bazel-out/host/bin/py/tests/external-deps/main.runfiles/aspect_rules_py/py/tests/external-deps/lib.py +From lib in another package: rules_py +lib filepath: (pwd)/bazel-out/host/bin/py/tests/external-deps/main.runfiles/aspect_rules_py/examples/foo/__init__.py