Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions changelog.d/141.fix.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Fixes model location while running within BentoContainer correctly

This makes sure that the tags and model path are inferred correctly, based on BENTO_PATH and /.dockerenv
5 changes: 1 addition & 4 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -305,10 +305,7 @@ reportUnknownVariableType = "warning"
typeCheckingMode = "strict"

[tool.mypy]
# TODO: remove all of the disable to ensure strict type
disable_error_code = ["attr-defined", "name-defined", "annotation-unchecked"]
enable_error_code = ["redundant-expr"]
exclude = ["examples/", "tools/", "tests/", "src/openllm/playground/"]
exclude = ["src/openllm/playground/"]
files = ["src/openllm", "src/openllm_client"]
local_partial_types = true
mypy_path = "typings"
Expand Down
133 changes: 22 additions & 111 deletions src/openllm/_llm.py

Large diffs are not rendered by default.

32 changes: 7 additions & 25 deletions src/openllm/bundle/_package.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@
import fs.copy
import fs.errors
import orjson
from packaging.version import Version
from simple_di import Provide
from simple_di import inject

Expand Down Expand Up @@ -76,23 +75,13 @@ def build_editable(path: str) -> str | None:
return builder.build("wheel", path, config_settings={"--global-option": "--quiet"})
raise RuntimeError("Custom OpenLLM build is currently not supported. Please install OpenLLM from PyPI or built it from Git source.")


def handle_package_version(package: str, has_dockerfile_template: bool, lower_bound: bool = True):
version = Version(pkg.get_pkg_version(package))
if version.is_devrelease:
if has_dockerfile_template: logger.warning("Installed %s has version %s as a dev release. This means you have a custom build of %s with %s. Make sure to use custom dockerfile templates (--dockerfile-template) to setup %s correctly. See https://docs.bentoml.com/en/latest/guides/containerization.html#dockerfile-template for more information.", package, version, package, "CUDA support" if "cu" in str(version) else "more features", package)
return package
return f"{package}>={importlib.metadata.version(package)}" if lower_bound else package


def construct_python_options(
llm: openllm.LLM[t.Any, t.Any],
llm_fs: FS,
has_dockerfile_template: bool,
extra_dependencies: tuple[str, ...] | None = None,
adapter_map: dict[str, str | None] | None = None,
) -> PythonOptions:
packages = ["openllm"]
packages = ["openllm", "scipy"] # apparently bnb misses this one
if adapter_map is not None: packages += ["openllm[fine-tune]"]
# NOTE: add openllm to the default dependencies
# if users has openllm custom built wheels, it will still respect
Expand All @@ -102,14 +91,13 @@ def construct_python_options(

req = llm.config["requirements"]
if req is not None: packages.extend(req)

if str(os.environ.get("BENTOML_BUNDLE_LOCAL_BUILD", False)).lower() == "false": packages.append(f"bentoml>={'.'.join([str(i) for i in pkg.pkg_version_info('bentoml')])}")

env: EnvVarMixin = llm.config["env"]
env = llm.config["env"]
framework_envvar = env["framework_value"]
if framework_envvar == "flax":
if not is_flax_available(): raise ValueError(f"Flax is not available, while {env.framework} is set to 'flax'")
packages.extend([handle_package_version("flax", has_dockerfile_template), handle_package_version("jax", has_dockerfile_template), handle_package_version("jaxlib", has_dockerfile_template)])
packages.extend([importlib.metadata.version("flax"), importlib.metadata.version("jax"), importlib.metadata.version("jaxlib")])
elif framework_envvar == "tf":
if not is_tf_available(): raise ValueError(f"TensorFlow is not available, while {env.framework} is set to 'tf'")
candidates = (
Expand All @@ -127,7 +115,7 @@ def construct_python_options(
# For the metadata, we have to look for both tensorflow and tensorflow-cpu
for candidate in candidates:
try:
pkgver = handle_package_version(candidate, has_dockerfile_template)
pkgver = importlib.metadata.version(candidate)
if pkgver == candidate: packages.extend(["tensorflow"])
else:
_tf_version = importlib.metadata.version(candidate)
Expand All @@ -136,14 +124,12 @@ def construct_python_options(
except importlib.metadata.PackageNotFoundError: pass
else:
if not is_torch_available(): raise ValueError("PyTorch is not available. Make sure to have it locally installed.")
packages.extend([handle_package_version("torch", has_dockerfile_template)])
packages.extend([importlib.metadata.version("torch")])

wheels: list[str] = []
built_wheels = build_editable(llm_fs.getsyspath("/"))
if built_wheels is not None: wheels.append(llm_fs.getsyspath(f"/{built_wheels.split('/')[-1]}"))

return PythonOptions(packages=packages, wheels=wheels, lock_packages=False)

return PythonOptions(packages=packages, wheels=wheels, lock_packages=False, extra_index_url=["https://download.pytorch.org/whl/cu118"])

def construct_docker_options(
llm: openllm.LLM[t.Any, t.Any],
Expand All @@ -164,7 +150,6 @@ def construct_docker_options(
]
_bentoml_config_options += " " if _bentoml_config_options else "" + " ".join(_bentoml_config_options_opts)
env: EnvVarMixin = llm.config["env"]

env_dict = {
env.framework: env.framework_value,
env.config: f"'{llm.config.model_dump_json().decode()}'",
Expand All @@ -175,7 +160,6 @@ def construct_docker_options(
"BENTOML_CONFIG_OPTIONS": f"'{_bentoml_config_options}'",
env.model_id: f"/home/bentoml/bento/models/{llm.tag.path()}", # This is the default BENTO_PATH var
}

if adapter_map: env_dict["BITSANDBYTES_NOWELCOME"] = os.environ.get("BITSANDBYTES_NOWELCOME", "1")

# We need to handle None separately here, as env from subprocess doesn't accept None value.
Expand All @@ -184,7 +168,6 @@ def construct_docker_options(
if _env.bettertransformer_value is not None: env_dict[_env.bettertransformer] = str(_env.bettertransformer_value)
if _env.quantize_value is not None: env_dict[_env.quantize] = _env.quantize_value
env_dict[_env.runtime] = _env.runtime_value

return DockerOptions(
cuda_version="11.8.0",
env=env_dict,
Expand All @@ -193,7 +176,6 @@ def construct_docker_options(
python_version="3.9",
)


@inject
def create_bento(
bento_tag: bentoml.Tag,
Expand Down Expand Up @@ -235,7 +217,7 @@ def create_bento(
description=f"OpenLLM service for {llm.config['start_name']}",
include=list(llm_fs.walk.files()),
exclude=["/venv", "/.venv", "__pycache__/", "*.py[cod]", "*$py.class"],
python=construct_python_options(llm, llm_fs, dockerfile_template is None, extra_dependencies, adapter_map),
python=construct_python_options(llm, llm_fs, extra_dependencies, adapter_map),
docker=construct_docker_options(llm, llm_fs, workers_per_resource, quantize, bettertransformer, adapter_map, dockerfile_template, runtime, serialisation_format),
models=[llm_spec],
)
Expand Down
16 changes: 6 additions & 10 deletions src/openllm/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -1281,12 +1281,11 @@ def build_command(
llm_fs.makedir(src_folder_name, recreate=True)
fs.copy.copy_dir(src_fs, _adapter_id, llm_fs, src_folder_name)
adapter_map[src_folder_name] = name
except FileNotFoundError:
# this is the remote adapter, then just added back
# note that there is a drawback here. If the path of the local adapter
# path have the same name as the remote, then we currently don't support
# that edge case.
adapter_map[_adapter_id] = name
# this is the remote adapter, then just added back
# note that there is a drawback here. If the path of the local adapter
# path have the same name as the remote, then we currently don't support
# that edge case.
except FileNotFoundError: adapter_map[_adapter_id] = name
os.environ["OPENLLM_ADAPTER_MAP"] = orjson.dumps(adapter_map).decode()
bento_tag = bentoml.Tag.from_taglike(f"{llm.llm_type}-service:{llm.tag.version}".lower().strip())
try:
Expand Down Expand Up @@ -1332,8 +1331,6 @@ def build_command(
elif not overwrite: _echo(f"'{model_name}' already has a Bento built [{bento}]. To overwrite it pass '--overwrite'.", fg="yellow")
_echo(
"📖 Next steps:\n\n"
+ "* Serving BentoLLM locally with 'openllm start':\n"
+ f" $ openllm start {bento.tag}\n\n"
+ "* Push to BentoCloud with 'bentoml push':\n"
+ f" $ bentoml push {bento.tag}\n\n"
+ "* Containerize your Bento with 'bentoml containerize':\n"
Expand All @@ -1350,10 +1347,9 @@ def build_command(
if push: BentoMLContainer.bentocloud_client.get().push_bento(bento, context=t.cast(CliContext, ctx.obj).cloud_context)
elif containerize:
backend = t.cast("DefaultBuilder", os.getenv("BENTOML_CONTAINERIZE_BACKEND", "docker"))
_echo(f"Building {bento} into a LLMContainer using backend '{backend}'", fg="magenta")
try: bentoml.container.health(backend)
except subprocess.CalledProcessError: raise OpenLLMException(f"Failed to use backend {backend}") from None
bentoml.container.build(bento.tag, backend=backend, features=("grpc",))
bentoml.container.build(bento.tag, backend=backend, features=("grpc","io"))
return bento


Expand Down
13 changes: 4 additions & 9 deletions src/openllm/serialisation/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,12 +59,7 @@
transformers = LazyLoader("transformers", globals(), "transformers")


def import_model(
llm: openllm.LLM[t.Any, t.Any],
*decls: t.Any,
trust_remote_code: bool,
**attrs: t.Any,
) -> bentoml.Model:
def import_model(llm: openllm.LLM[M, T], *decls: t.Any, trust_remote_code: bool, **attrs: t.Any) -> bentoml.Model:
if llm.runtime == "transformers":
return openllm.transformers.import_model(llm, *decls, trust_remote_code=trust_remote_code, **attrs)
elif llm.runtime == "ggml":
Expand All @@ -73,7 +68,7 @@ def import_model(
raise ValueError(f"Unknown runtime: {llm.config['runtime']}")


def get(llm: openllm.LLM[t.Any, t.Any], auto_import: bool = False) -> bentoml.Model:
def get(llm: openllm.LLM[M, T], auto_import: bool = False) -> bentoml.Model:
if llm.runtime == "transformers":
return openllm.transformers.get(llm, auto_import=auto_import)
elif llm.runtime == "ggml":
Expand All @@ -82,7 +77,7 @@ def get(llm: openllm.LLM[t.Any, t.Any], auto_import: bool = False) -> bentoml.Mo
raise ValueError(f"Unknown runtime: {llm.config['runtime']}")


def save_pretrained(llm: openllm.LLM[t.Any, t.Any], save_directory: str, **attrs: t.Any) -> None:
def save_pretrained(llm: openllm.LLM[M, T], save_directory: str, **attrs: t.Any) -> None:
if llm.runtime == "transformers":
return openllm.transformers.save_pretrained(llm, save_directory, **attrs)
elif llm.runtime == "ggml":
Expand All @@ -91,7 +86,7 @@ def save_pretrained(llm: openllm.LLM[t.Any, t.Any], save_directory: str, **attrs
raise ValueError(f"Unknown runtime: {llm.config['runtime']}")


def load_model(llm: openllm.LLM[M, t.Any], *decls: t.Any, **attrs: t.Any) -> M:
def load_model(llm: openllm.LLM[M, T], *decls: t.Any, **attrs: t.Any) -> M:
if llm.runtime == "transformers":
return openllm.transformers.load_model(llm, *decls, **attrs)
elif llm.runtime == "ggml":
Expand Down
Loading