Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP][RFC] New Image Builder Interface #817

Draft
wants to merge 1 commit into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions truss-build/.tool-versions
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
python 3.10.12
poetry 1.5.1
21 changes: 21 additions & 0 deletions truss-build/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
# truss-utils

This package is a repository of common functions that help with developing
production AI/ML models with Truss.

## Use

In the `model.py` of your truss, you can do something like the following:

```
from truss_utils.image import pil_to_64

class Model:
...

def predict(self, model_input):
# call Stable diffusion
...

return pil_to_b64(image)
```
220 changes: 220 additions & 0 deletions truss-build/poetry.lock

Large diffs are not rendered by default.

17 changes: 17 additions & 0 deletions truss-build/pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
[tool.poetry]
name = "truss-build"
version = "0.1.0"
description = "Abstraction to build Docker images for Truss"
authors = ["Truss Maintainers <team@trussml.com>"]
license = "MIT"
readme = "README.md"
packages = [{ include = "truss", from = "./src" }]

[tool.poetry.dependencies]
python = ">=3.10,<3.12"
docker = "^7.0.0"


[build-system]
requires = ["poetry-core"]
build-backend = "poetry.core.masonry.api"
15 changes: 15 additions & 0 deletions truss-build/src/truss/build/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
__version__ = "0.1.0"

from truss.build.image import Image

if __name__ == "__main__":
from pprint import pprint
from pathlib import Path

img = (
Image()
.apt_install("python3.10-venv")
.pip_install("numpy", "torch")
.env({"NOPROXY": "*"})
)
print(img.serialize())
12 changes: 12 additions & 0 deletions truss-build/src/truss/build/builder.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
from .image import Image
import tempfile
from pathlib import Path


def build(image: Image):
with tempfile.NamedTemporaryFile(
mode="w", delete=False, suffix=".Dockerfile"
) as temp_dockerfile:
dockerfile_path = temp_dockerfile.name
print(f"Dockerfile created at: {dockerfile_path}")
Path(dockerfile_path).write_text(image.serialize())
93 changes: 93 additions & 0 deletions truss-build/src/truss/build/commands.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
from abc import abstractmethod
import shlex


# TODO(bola): Support secrets
class Command:
@abstractmethod
def serialize(self):
pass


class FromCommand(Command):
def __init__(self, image, tag=None, AS=None):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add type annotations.

self.image = image
self.tag = tag
self._as = AS

def serialize(self):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Add str return type.

When I read "serialize" I usually assume it's bytes.

Maybe even consider renaming this method to to_str or as_str so it's immediately deductable from the name. Since this gets later called as write_text(image.serialize()) it might also make sense as_text.

ret = f"FROM {self.image}"
if self.tag is not None:
ret += f":{self.tag}"
if self._as is not None:
ret += f" AS {self._as}"
return ret
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: I know that in this context runtime is not really an issue, but I got used to try to always use efficient implementations - unless it's significantly more complex and less readable.

So here you could collect the conditional command in parts: list[str] and and the end " ".join(parts).



class RunCommand(Command):
def __init__(self, command, mounts=None):
self.command = command
self.mounts = mounts

def serialize(self):
cmd = f"RUN "
if self.mounts is not None:
for mount in self.mounts:
cmd += f"--mount={mount} "
cmd += self.command
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: like previous comment, collect parts in list and join at the end. This loop has in principle quadratic runtime.

return cmd


class CopyCommand(Command):
# TODO(bola): From should be an image object.
def __init__(self, src, dst, FROM=None):
self.src = src
self.dst = dst
self._from = FROM

def serialize(self):
cmd = "COPY "
if self._from:
cmd += f"--from={self._from} "
return f"{cmd}{self.src} {self.dst}"


class EntrypointCommand(Command):
def __init__(self, command):
self.command = command

def serialize(self):
return f"ENTRYPOINT {self.command}"


class EnvCommand(Command):
def __init__(self, name, value):
self.name = name
self.value = value

def serialize(self):
return f'ENV {self.name}="{shlex.quote(self.value)}"'


class ExposeCommand(Command):
def __init__(self, ports):
self.ports = ports

def serialize(self):
return f"EXPOSE {self.ports}"


class VolumeCommand(Command):
def __init__(self, volumes):
self.volumes = volumes

def serialize(self):
return f"VOLUME {self.volumes}"


class WorkdirCommand(Command):
def __init__(self, path):
self.path = path

def serialize(self):
return f"WORKDIR {shlex.quote(self.path)}"
10 changes: 10 additions & 0 deletions truss-build/src/truss/build/exceptions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
class InvalidError(Exception):
pass


class NotFoundError(Exception):
pass


class RemoteError(Exception):
pass
165 changes: 165 additions & 0 deletions truss-build/src/truss/build/image.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
from .commands import *
from typing import Dict, List, Optional, Union
from pathlib import Path
from .utils import flatten_str_args, make_pip_install_args
import shlex

# TODO(bola): support secrets


class Image:
def __init__(self) -> None:
self._base_image = ""
self._commands: list[Command] = []
self._base_image: Optional[str] = None
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: To be very precise, class attributes should be type annotated before __init__.

class Image:
    _base_image: Optional[str]
    _commands: list[str]

    def __init__(self) -> None:
        ...

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

when you say class attributes, this meand instance variables? They belong to self not cls, right?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, both instance and class variables would be on top level and distinguished buy ClassVar. E.g.

class Image:
    _some_data: str  # Instance variable.
    _registry: ClassVar[Registry]  # Class variable

    def __init__(self) -> None:
        ...

See https://peps.python.org/pep-0526/#class-and-instance-variable-annotations


# TODO(support static constructors for different ways of pulling base iamge)
# Image.from_dockerfile, from_aws_ecr, from_gcp_artifact_registry, from_registry
# also can add common base images: Image.truss_base()....

def serialize(self) -> str:
dockerfile = "# syntax = docker/dockerfile:1\n" # Support BuildKit
dockerfile += "# Auto-generated by truss.build, do not edit!\n"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: consider making these string literals module- (or class-) private constants (_SYNTAX_HEADER: ClassVar[str] = "...")

I really think the less string literals appear somewhere hidden/inline in the depth of code the better...

for command in self._commands:
dockerfile += command.serialize() + "\n"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Use return "\n".join(self._commands) -> shorter code and more performant.

return dockerfile

def apt_install(
self,
*packages: Union[str, List[str]],
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Optional/Opinion: is there a reason to support both variable args and list?

While it may seem more user friendly to have multiple and flexible options, I personally think it's less mental load to have exactly one right way of doing things and seeing that everywhere consistently, as opposed to having to make a decision each time knowing about "two forms".

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am mostly trying to be API compatible with modal but we can have our own take here. Open to discussion.

) -> "Image":
"""Install a list of Debian packages using `apt`.

**Example**

```python
image = Image.debian_slim().apt_install("git")
```
"""
pkgs = flatten_str_args("apt_install", "packages", packages)
if not pkgs:
return self
package_args = " ".join(shlex.quote(pkg) for pkg in pkgs)
self._commands.extend(
[
RunCommand("apt-get update"),
RunCommand(f"RUN apt-get install -y {package_args}"),
]
)
return self

def run_commands(
self,
*commands: Union[str, List[str]],
) -> "Image":
"""Extend an image with a list of shell commands to run."""
cmds = flatten_str_args("run_commands", "commands", commands)
if not cmds:
return self
self._commands.extend([RunCommand(cmd) for cmd in cmds])
return self

def workdir(self, path: str) -> "Image":
"""Sets the working directory for subequent image build steps.

:param path: A path to set the workdir.

**Example**

```python
image = (
Image.debian_slim()
.run_commands("git clone https://xyz app")
.workdir("/app")
.run_commands("yarn install")
)
```
"""
self._commands.append(WorkdirCommand(path))
return self

def env(self, vars: Dict[str, str]) -> "Image":
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: to denote that vars is immutable ("const") for this function, use from typing import Mapping.

"""
Sets the environmental variables of the image.

:param vars: A dictionary where keys are the names of the environment variables and values are the values to be set for those variables.

**Example**

```python
image = (
Image.conda()
.env({"CONDA_OVERRIDE_CUDA": "11.2"})
.conda_install("jax", "cuda-nvcc", channels=["conda-forge", "nvidia"])
.pip_install("dm-haiku", "optax")
)
```
"""
self._commands.extend([EnvCommand(k, v) for k, v in vars.items()])
return self

def pip_install(
self,
*packages: Union[str, List[str]],
find_links: Optional[str] = None,
index_url: Optional[str] = None,
extra_index_url: Optional[str] = None,
pre: bool = False,
) -> "Image":
"""
Install a list of Python packages using pip.

:param packages: A list of Python packages, e.g., ["numpy", "matplotlib>=3.5.0"]
:param find_links: Passes -f (--find-links) to pip install
:param index_url: Passes -i (--index-url) to pip install
:param extra_index_url: Passes --extra-index-url to pip install
:param pre: Passes --pre (allow pre-releases) to pip install

**Example**

```python
image = Image.debian_slim().pip_install("click", "httpx~=0.23.3")
```
"""
pkgs = flatten_str_args("pip_install", "packages", packages)
if not pkgs:
return self
extra_args = make_pip_install_args(find_links, index_url, extra_index_url, pre)
package_args = " ".join(shlex.quote(pkg) for pkg in sorted(pkgs))
cmd = f"python3 -m pip install {package_args}"
if extra_args:
cmd += f" {extra_args}"
self._commands.append(RunCommand(cmd))
return self

def pip_install_from_requirements(
self,
requirements_txt: str, # Path to a requirements.txt file.
find_links: Optional[str] = None, # Passes -f (--find-links) pip install
*,
index_url: Optional[str] = None, # Passes -i (--index-url) to pip install
extra_index_url: Optional[
str
] = None, # Passes --extra-index-url to pip install
pre: bool = False, # Passes --pre (allow pre-releases) to pip install
) -> "Image":
"""Install a list of Python packages from a local `requirements.txt` file."""
pass

def copy_local_file(
self, local_path: Union[str, Path], remote_path: Union[str, Path] = "./"
) -> "Image":
"""Copy a file into the image as a part of building it.

This works in a similar way to [`COPY`](https://docs.docker.com/engine/reference/builder/#copy) in a `Dockerfile`.
"""
raise NotImplementedError()

def copy_local_dir(
self, local_path: Union[str, Path], remote_path: Union[str, Path] = "."
) -> "Image":
"""Copy a directory into the image as a part of building the image.

This works in a similar way to [`COPY`](https://docs.docker.com/engine/reference/builder/#copy) in a `Dockerfile`.
"""
raise NotImplementedError()
Loading
Loading