Skip to content

Commit

Permalink
Add support for pex (#21)
Browse files Browse the repository at this point in the history
  • Loading branch information
ethanluoyc committed Feb 21, 2024
1 parent 697d764 commit 970ad6e
Show file tree
Hide file tree
Showing 10 changed files with 256 additions and 23 deletions.
8 changes: 0 additions & 8 deletions examples/experimental/pkg/main.py

This file was deleted.

1 change: 1 addition & 0 deletions examples/pex_binary/data.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
hello world!
81 changes: 81 additions & 0 deletions examples/pex_binary/launcher.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
#!/usr/bin/env python3
from absl import app
from absl import flags

from lxm3 import xm
from lxm3 import xm_cluster
from lxm3.contrib import ucl

_LAUNCH_ON_CLUSTER = flags.DEFINE_boolean(
"launch_on_cluster", False, "Launch on cluster"
)
_GPU = flags.DEFINE_boolean("gpu", False, "If set, use GPU")
_SINGULARITY_CONTAINER = flags.DEFINE_string(
"container", None, "Path to singularity container"
)


def main(_):
with xm_cluster.create_experiment(experiment_title="basic") as experiment:
if _GPU.value:
job_requirements = xm_cluster.JobRequirements(gpu=1, ram=8 * xm.GB)
else:
job_requirements = xm_cluster.JobRequirements(ram=8 * xm.GB)
if _LAUNCH_ON_CLUSTER.value:
# This is a special case for using SGE in UCL where we use generic
# job requirements and translate to SGE specific requirements.
# Non-UCL users, use `xm_cluster.GridEngine directly`.
executor = ucl.UclGridEngine(
job_requirements,
walltime=10 * xm.Min,
)
else:
executor = xm_cluster.Local(job_requirements)

resource = xm_cluster.Fileset(
{xm.utils.resolve_path_relative_to_launcher("data.txt"): "data.txt"}
)
data_path = resource.get_path("data.txt", executor.Spec()) # type: ignore

spec = xm_cluster.PexBinary(
# This is a relative path to the launcher that contains
# your python package (i.e. the directory that contains pyproject.toml)
# Entrypoint is the python module that you would like to
# In the implementation, this is translated to
# python3 -m py_package.main
entrypoint=xm_cluster.ModuleName("py_package.main"),
path=".",
packages=["py_package"],
dependencies=[resource],
)

# Wrap the python_package to be executing in a singularity container.
singularity_container = _SINGULARITY_CONTAINER.value

# It's actually not necessary to use a container, without it, we
# fallback to the current python environment for local executor and
# whatever Python environment picked up by the cluster for GridEngine.
# For remote execution, using the host environment is not recommended.
# as you may spend quite some time figuring out dependency problems than
# writing a simple Dockfiler/Singularity file.
if singularity_container is not None:
spec = xm_cluster.SingularityContainer(
spec,
image_path=singularity_container,
)

[executable] = experiment.package(
[xm.Packageable(spec, executor_spec=executor.Spec())]
)

experiment.add(
xm.Job(
executable=executable,
executor=executor,
args={"seed": 1, "data": data_path},
)
)


if __name__ == "__main__":
app.run(main)
Empty file.
17 changes: 17 additions & 0 deletions examples/pex_binary/py_package/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import argparse

parser = argparse.ArgumentParser(description="A simple example.")
parser.add_argument("--seed", type=int, help="Random seed.")
parser.add_argument("--data", type=str)


def main():
args = parser.parse_args()
print(args)
with open(args.data, "r") as f:
print("data file contents:")
print(f.read())


if __name__ == "__main__":
main()
1 change: 1 addition & 0 deletions lxm3/xm_cluster/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from lxm3.xm_cluster.executable_specs import Fileset
from lxm3.xm_cluster.executable_specs import ModuleName
from lxm3.xm_cluster.executable_specs import PDMProject
from lxm3.xm_cluster.executable_specs import PexBinary
from lxm3.xm_cluster.executable_specs import PythonContainer
from lxm3.xm_cluster.executable_specs import PythonPackage
from lxm3.xm_cluster.executable_specs import SingularityContainer
Expand Down
63 changes: 61 additions & 2 deletions lxm3/xm_cluster/executable_specs.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,65 @@ def __attrs_post_init__(self):
)


@attr.s(auto_attribs=True)
class PexBinary(job_blocks.ExecutableSpec):
"""Experimental Python executable packed as PEX. Requires PEX to be installed.
`PexBinary` is more flexible than `PythonPackage` and does not assume that your project
can be packaged as a python distribution.
You should be familiar with the PEX (https://docs.pex-tool.org/) before using this.
Notes:
A few assumptions is currently hardcoded in LXM3:
1. No dependencies are bundled inside the PEX. This is to avoid bundling large
dependencies like TensorFlow. Instead, dependencies should be available
in the runtime environment (e.g. via a Singularity container). This implies
using `--inherit-path=fallback`.
2. Caching extracted PEXes is disabled. This is to avoid accumulating large
number of files in the default cache directory. Instead, we always extract
to a temporary directory (`./.pex` in the runtime root which is a scratch directory).
Args:
entrypoint: Entrypoint for the built executable. Only :obj:`ModuleName` is supported.
path: Path to the project. If it's a relative path, this will be resolved
relative to the launcher's working directory. This path will be used
as the working directory when building the PEX.
packages: List of packages to include in the PEX. Packages are directories containing
python code and will be passed to the ``pex`` command as '--package <package>'.
Syntax for passing packages nested in a subdirectory is the same as the PEX documentation.
modules: List of modules to include in the PEX. Modules are single python files and will
be passed to the ``pex`` command as '--module <module>'.
Syntax for passing modules nested in a subdirectory is the same as the PEX documentation.
dependencies: List of dependencies to include in the PEX. These are put inside
the _archive_ created by LXM3 as opposed to being bundled inside by the PEX
(via -D). Useful for including configuration files, etc.
TODOs:
Expose more options from the pex command line interface.
Examples:
See ``examples/pex_binary`` for an example.
Raises:
ValueError: If the build script is not executable.
"""

entrypoint: ModuleName
path: str = attr.ib(converter=utils.resolve_path_relative_to_launcher, default=".")
packages: List[str] = attr.ib(
converter=list, default=attr.Factory(list), kw_only=True
)
modules: List[str] = attr.ib(
converter=list, default=attr.Factory(list), kw_only=True
)
dependencies: List[Fileset] = attr.ib(converter=list, default=attr.Factory(list))

@property
def name(self) -> str:
return name_from_path(self.path)


@attr.s(auto_attribs=True)
class SingularityContainer(job_blocks.ExecutableSpec):
"""An executable that can be executed in a Singularity container.
Expand All @@ -196,7 +255,7 @@ class SingularityContainer(job_blocks.ExecutableSpec):
to the launcher's working directory.
"""

entrypoint: Union[UniversalPackage, PythonPackage]
entrypoint: Union[UniversalPackage, PythonPackage, PexBinary]
image_path: str

@property
Expand Down Expand Up @@ -262,7 +321,7 @@ class DockerContainer(job_blocks.ExecutableSpec):
image: Name of the Docker image.
"""

entrypoint: Union[UniversalPackage, PythonPackage]
entrypoint: Union[UniversalPackage, PythonPackage, PexBinary]
image: str

@property
Expand Down
69 changes: 69 additions & 0 deletions lxm3/xm_cluster/packaging/cluster.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
import contextlib
import os
import shutil
import subprocess
import tempfile
from typing import Any

Expand All @@ -12,6 +15,7 @@
from lxm3.xm_cluster import config as config_lib
from lxm3.xm_cluster import executable_specs as cluster_executable_specs
from lxm3.xm_cluster import executables as cluster_executables
from lxm3.xm_cluster.console import console
from lxm3.xm_cluster.packaging import archive_builder
from lxm3.xm_cluster.packaging import container_builder

Expand Down Expand Up @@ -49,6 +53,70 @@ def _package_python_package(
)


@contextlib.contextmanager
def _chdir(directory):
cwd = os.getcwd()
os.chdir(directory)
yield
os.chdir(cwd)


def _package_pex_binary(
spec: cluster_executable_specs.PexBinary,
packageable: xm.Packageable,
artifact_store: artifacts.ArtifactStore,
):
pex_executable = shutil.which("pex")
pex_name = f"{spec.name}.pex"

assert pex_executable is not None, "pex executable not found"
with tempfile.TemporaryDirectory() as staging:
install_dir = os.path.join(staging, "install")
pex_path = os.path.join(install_dir, pex_name)
with _chdir(spec.path):
pex_options = []
for pkg in spec.packages:
pex_options.extend(["--package", pkg])
for pkg in spec.modules:
pex_options.extend(["--module", pkg])
pex_options.extend(["--inherit-path=fallback"])
pex_options.extend(["--entry-point", spec.entrypoint.module_name])
pex_options.extend(["--runtime-pex-root=./.pex"])
with console.status(f"Creating pex {pex_name}"):
pex_cmd = [pex_executable, "-o", pex_path, *pex_options]
console.log(f"Running pex command: {' '.join(pex_cmd)}")
subprocess.run(pex_cmd, check=True)

# Add resources to the archive
for resource in spec.dependencies:
for src, dst in resource.files:
target_file = os.path.join(install_dir, dst)
if not os.path.exists(os.path.dirname(target_file)):
os.makedirs(os.path.dirname(target_file))
if not os.path.exists(target_file):
shutil.copy(src, target_file)
else:
raise ValueError(
"Additional resource overwrites existing file: %s", src
)

local_archive_path = shutil.make_archive(
os.path.join(staging, spec.name), "zip", install_dir
)
push_archive_name = os.path.basename(local_archive_path)
deployed_archive_path = artifact_store.deploy_resource_archive(
local_archive_path, push_archive_name
)

return cluster_executables.Command(
entrypoint_command=f"./{pex_name}",
resource_uri=deployed_archive_path,
name=spec.name,
args=packageable.args,
env_vars=packageable.env_vars,
)


def _package_universal_package(
universal_package: cluster_executable_specs.UniversalPackage,
packageable: xm.Packageable,
Expand Down Expand Up @@ -173,6 +241,7 @@ def _throw_on_unknown_executable(

_PACKAGING_ROUTER = pattern_matching.match(
_package_python_package,
_package_pex_binary,
_package_universal_package,
_package_pdm_project,
_package_python_container,
Expand Down
14 changes: 12 additions & 2 deletions pdm.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

25 changes: 14 additions & 11 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ build-backend = "pdm.backend"
name = "lxm3"
description = 'LXM3: XManager launch backend for HPC clusters'
readme = "README.md"
requires-python = ">=3.9"
requires-python = ">=3.9,<3.13"
license = {text = "MIT"}
keywords = ["machine-learning", "hpc", "packaging", "singularity"]
authors = [{ name = "Yicheng Luo", email = "ethanluoyc@gmail.com" }]
Expand Down Expand Up @@ -50,19 +50,22 @@ lxm3 = "lxm3.cli.cli:entrypoint"

[project.optional-dependencies]
shell = ["IPython"]
pex = [
"pex>2.1.139",
]

[tool.pdm.dev-dependencies]
dev = [
"coverage[toml]>=6.5",
"pytest",
"pytest-cov",
"pre-commit",
"fabric",
"Sphinx",
"myst-parser",
"sphinx_nameko_theme",
"black>=23.1.0",
"ruff>=0.0.243",
"coverage[toml]>=6.5",
"pytest",
"pytest-cov",
"pre-commit",
"fabric",
"Sphinx",
"myst-parser",
"sphinx_nameko_theme",
"black>=23.1.0",
"ruff>=0.0.243",
]

[tool.pdm.version]
Expand Down

0 comments on commit 970ad6e

Please sign in to comment.