Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(start): openllm start bento #80

Merged
merged 5 commits into from
Jun 27, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions .github/workflows/cleanup-cache.yml
Original file line number Diff line number Diff line change
@@ -1,3 +1,17 @@
# Copyright 2023 BentoML Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

name: cache-cleanup
on:
pull_request:
Expand Down
3 changes: 2 additions & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,8 @@ repos:
rev: 23.3.0
hooks:
- id: black-jupyter
files: '/(src|tests|docs|examples|typings)/'
files: '/(src|tests|docs|examples|typings|tools)/'
language_version: python3.11
- repo: https://github.com/econchick/interrogate
rev: 1.5.0
hooks:
Expand Down
17 changes: 17 additions & 0 deletions changelog.d/80.fix.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
Fixes loading logics from custom path. If given model path are given, OpenLLM
won't try to import it to the local store.

OpenLLM now only imports and fixes the models to loaded correctly within the
bento, see the generated service for more information.

Fixes service not ready when serving within a container or on BentoCloud. This
has to do with how we load the model before in the bento.

Falcon loading logics has been reimplemented to fix this major bug. Make sure to
delete all previous save weight for falcon with `openllm prune`

`openllm start` now supports bento

```bash
openllm start llm-bento --help
```
6 changes: 6 additions & 0 deletions hatch.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,12 @@ changelog = "towncrier build --version main --draft"
fmt = "pre-commit run --all-files"
full = "_run_script --reruns 5 --reruns-delay 3 -r aR {args:tests}"
setup = "pre-commit install"
tools = [
"./tools/update-readme.py",
"./tools/update-optional-dependencies.py",
"./tools/update-config-stubs.py",
"- ./tools/add-license-headers .",
]
typing = "pyright {args:src/openllm tests}"
[envs.test.overrides]
env.GITHUB_ACTIONS.env-vars = "COVERAGE_REPORT="
Expand Down
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -63,10 +63,10 @@ all = [
"openllm[chatglm]",
"openllm[starcoder]",
"openllm[falcon]",
"openllm[agents]",
"openllm[flan-t5]",
"openllm[fine-tune]",
"openllm[agents]",
"openllm[openai]",
"openllm[flan-t5]",
]
chatglm = ["cpm_kernels", "sentencepiece"]
falcon = ["einops", "xformers", "safetensors"]
Expand Down
46 changes: 26 additions & 20 deletions src/openllm/_configuration.py
Original file line number Diff line number Diff line change
Expand Up @@ -274,10 +274,8 @@ def to_peft_config(self) -> peft.PeftConfig:
# makes a copy to correctly set all modules
adapter_config = self.adapter_config.copy()
if "peft_type" in adapter_config:
logger.debug(
"'peft_type' is not required as it is managed internally by '%s' and 'peft'.",
self.__class__.__name__,
)
# no need for peft_type since it is internally
# managed by OpenLLM and PEFT
adapter_config.pop("peft_type")

# respect user set task_type if it is passed, otherwise use one managed by OpenLLM
Expand Down Expand Up @@ -1419,13 +1417,10 @@ def __init__(
)

for k in _cached_keys:
if k in generation_config or attrs.get(k) is None:
if k in generation_config or attrs[k] is None:
del attrs[k]

self.__openllm_extras__ = config_merger.merge(
first_not_none(__openllm_extras__, default={}),
{k: v for k, v in attrs.items() if k not in self.__openllm_accepted_keys__},
)
self.__openllm_extras__ = config_merger.merge(first_not_none(__openllm_extras__, default={}), {k: v for k, v in attrs.items() if k not in self.__openllm_accepted_keys__})
self.generation_config = self["generation_class"](**generation_config)

# The rest of attrs should only be the attributes to be passed to __attrs_init__
Expand Down Expand Up @@ -1664,6 +1659,14 @@ def model_dump(self, flatten: bool = False, **_: t.Any):
def model_dump_json(self, **kwargs: t.Any):
return orjson.dumps(self.model_dump(**kwargs))

@classmethod
def model_construct_json(cls, json_str: str | bytes) -> t.Self:
try:
attrs = orjson.loads(json_str)
except orjson.JSONDecodeError as err:
raise openllm.exceptions.ValidationError(f"Failed to load JSON: {err}")
return bentoml_cattr.structure(attrs, cls)

@classmethod
def model_construct_env(cls, **attrs: t.Any) -> t.Self:
"""A helpers that respect configuration values that
Expand All @@ -1675,30 +1678,29 @@ def model_construct_env(cls, **attrs: t.Any) -> t.Self:

env_json_string = os.environ.get(model_config, None)

config_from_env: DictStrAny = {}
if env_json_string is not None:
try:
config_from_env = orjson.loads(env_json_string)
except orjson.JSONDecodeError as e:
raise RuntimeError(f"Failed to parse '{model_config}' as valid JSON string.") from e
else:
config_from_env = {}

env_struct = bentoml_cattr.structure(config_from_env, cls)

if "generation_config" in attrs:
generation_config = attrs.pop("generation_config")
if not LazyType(DictStrAny).isinstance(generation_config):
raise RuntimeError(f"Expected a dictionary, but got {type(generation_config)}")
else:
generation_config = {
k: v for k, v in attrs.items() if k in attr.fields_dict(env_struct.__openllm_generation_class__)
k: v for k, v in attrs.items() if k in attr.fields_dict(cls.__openllm_generation_class__)
}

for k in tuple(attrs.keys()):
if k in generation_config:
del attrs[k]

return attr.evolve(env_struct, generation_config=generation_config, **attrs)
config_from_env.update(attrs)
config_from_env['generation_config'] = generation_config
return bentoml_cattr.structure(config_from_env, cls)

def model_validate_click(self, **attrs: t.Any) -> tuple[LLMConfig, DictStrAny]:
"""Parse given click attributes into a LLMConfig and return the remaining click attributes."""
Expand Down Expand Up @@ -1779,7 +1781,12 @@ def peft_task_type(cls) -> str:

bentoml_cattr.register_unstructure_hook_factory(
lambda cls: lenient_issubclass(cls, LLMConfig),
lambda cls: make_dict_unstructure_fn(cls, bentoml_cattr, _cattrs_omit_if_default=False, _cattrs_use_linecache=True, ),
lambda cls: make_dict_unstructure_fn(
cls,
bentoml_cattr,
_cattrs_omit_if_default=False,
_cattrs_use_linecache=True,
),
)


Expand All @@ -1796,20 +1803,19 @@ def structure_llm_config(data: DictStrAny, cls: type[LLMConfig]) -> LLMConfig:
if not LazyType(DictStrAny).isinstance(data):
raise RuntimeError(f"Expected a dictionary, but got {type(data)}")

generation_cls_fields = attr.fields_dict(cls.__openllm_generation_class__)
cls_attrs = {
k: v for k, v in data.items() if k in cls.__openllm_accepted_keys__ and k not in generation_cls_fields
k: v for k, v in data.items() if k in cls.__openllm_accepted_keys__
}
generation_cls_fields = attr.fields_dict(cls.__openllm_generation_class__)
if "generation_config" in data:
generation_config = data.pop("generation_config")
if not LazyType(DictStrAny).isinstance(generation_config):
raise RuntimeError(f"Expected a dictionary, but got {type(generation_config)}")
config_merger.merge(generation_config, {k: v for k, v in data.items() if k in generation_cls_fields})
else:
generation_config = {k: v for k, v in data.items() if k in generation_cls_fields}
not_extras = list(cls_attrs) + list(generation_config)
# The rest should be passed to extras
data = {k: v for k, v in data.items() if k not in not_extras}
data = {k: v for k, v in data.items() if k not in cls.__openllm_accepted_keys__}

return cls(generation_config=generation_config, __openllm_extras__=data, **cls_attrs)

Expand Down
Loading
Loading