feat(start): starting bento and fix load (#80)

bentoml · Jun 27, 2023 · db1494a · db1494a
1 parent 77cb851
commit db1494a
Show file tree

Hide file tree

Showing 26 changed files with 838 additions and 439 deletions.
diff --git a/.github/workflows/cleanup-cache.yml b/.github/workflows/cleanup-cache.yml
@@ -1,3 +1,17 @@
+# Copyright 2023 BentoML Team. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 name: cache-cleanup
 on:
   pull_request:

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -28,7 +28,8 @@ repos:
     rev: 23.3.0
     hooks:
       - id: black-jupyter
-        files: '/(src|tests|docs|examples|typings)/'
+        files: '/(src|tests|docs|examples|typings|tools)/'
+        language_version: python3.11
   - repo: https://github.com/econchick/interrogate
     rev: 1.5.0
     hooks:

diff --git a/changelog.d/80.fix.md b/changelog.d/80.fix.md
@@ -0,0 +1,17 @@
+Fixes loading logics from custom path. If given model path are given, OpenLLM
+won't try to import it to the local store.
+
+OpenLLM now only imports and fixes the models to loaded correctly within the
+bento, see the generated service for more information.
+
+Fixes service not ready when serving within a container or on BentoCloud. This
+has to do with how we load the model before in the bento.
+
+Falcon loading logics has been reimplemented to fix this major bug. Make sure to
+delete all previous save weight for falcon with `openllm prune`
+
+`openllm start` now supports bento
+
+```bash
+openllm start llm-bento --help
+```
diff --git a/hatch.toml b/hatch.toml
@@ -31,6 +31,12 @@ changelog = "towncrier build --version main --draft"
 fmt = "pre-commit run --all-files"
 full = "_run_script --reruns 5 --reruns-delay 3 -r aR {args:tests}"
 setup = "pre-commit install"
+tools = [
+    "./tools/update-readme.py",
+    "./tools/update-optional-dependencies.py",
+    "./tools/update-config-stubs.py",
+    "- ./tools/add-license-headers .",
+]
 typing = "pyright {args:src/openllm tests}"
 [envs.test.overrides]
 env.GITHUB_ACTIONS.env-vars = "COVERAGE_REPORT="

diff --git a/pyproject.toml b/pyproject.toml
@@ -63,10 +63,10 @@ all = [
     "openllm[chatglm]",
     "openllm[starcoder]",
     "openllm[falcon]",
-    "openllm[agents]",
-    "openllm[flan-t5]",
     "openllm[fine-tune]",
+    "openllm[agents]",
     "openllm[openai]",
+    "openllm[flan-t5]",
 ]
 chatglm = ["cpm_kernels", "sentencepiece"]
 falcon = ["einops", "xformers", "safetensors"]

diff --git a/src/openllm/_configuration.py b/src/openllm/_configuration.py
@@ -274,10 +274,8 @@ def to_peft_config(self) -> peft.PeftConfig:
         # makes a copy to correctly set all modules
         adapter_config = self.adapter_config.copy()
         if "peft_type" in adapter_config:
-            logger.debug(
-                "'peft_type' is not required as it is managed internally by '%s' and 'peft'.",
-                self.__class__.__name__,
-            )
+            # no need for peft_type since it is internally
+            # managed by OpenLLM and PEFT
             adapter_config.pop("peft_type")
 
         # respect user set task_type if it is passed, otherwise use one managed by OpenLLM
@@ -1419,13 +1417,10 @@ def __init__(
             )
 
         for k in _cached_keys:
-            if k in generation_config or attrs.get(k) is None:
+            if k in generation_config or attrs[k] is None:
                 del attrs[k]
 
-        self.__openllm_extras__ = config_merger.merge(
-            first_not_none(__openllm_extras__, default={}),
-            {k: v for k, v in attrs.items() if k not in self.__openllm_accepted_keys__},
-        )
+        self.__openllm_extras__ = config_merger.merge(first_not_none(__openllm_extras__, default={}), {k: v for k, v in attrs.items() if k not in self.__openllm_accepted_keys__})
         self.generation_config = self["generation_class"](**generation_config)
 
         # The rest of attrs should only be the attributes to be passed to __attrs_init__
@@ -1664,6 +1659,14 @@ def model_dump(self, flatten: bool = False, **_: t.Any):
     def model_dump_json(self, **kwargs: t.Any):
         return orjson.dumps(self.model_dump(**kwargs))
 
+    @classmethod
+    def model_construct_json(cls, json_str: str | bytes) -> t.Self:
+        try:
+            attrs = orjson.loads(json_str)
+        except orjson.JSONDecodeError as err:
+            raise openllm.exceptions.ValidationError(f"Failed to load JSON: {err}")
+        return bentoml_cattr.structure(attrs, cls)
+
     @classmethod
     def model_construct_env(cls, **attrs: t.Any) -> t.Self:
         """A helpers that respect configuration values that
@@ -1675,30 +1678,29 @@ def model_construct_env(cls, **attrs: t.Any) -> t.Self:
 
         env_json_string = os.environ.get(model_config, None)
 
+        config_from_env: DictStrAny = {}
         if env_json_string is not None:
             try:
                 config_from_env = orjson.loads(env_json_string)
             except orjson.JSONDecodeError as e:
                 raise RuntimeError(f"Failed to parse '{model_config}' as valid JSON string.") from e
-        else:
-            config_from_env = {}
-
-        env_struct = bentoml_cattr.structure(config_from_env, cls)
 
         if "generation_config" in attrs:
             generation_config = attrs.pop("generation_config")
             if not LazyType(DictStrAny).isinstance(generation_config):
                 raise RuntimeError(f"Expected a dictionary, but got {type(generation_config)}")
         else:
             generation_config = {
-                k: v for k, v in attrs.items() if k in attr.fields_dict(env_struct.__openllm_generation_class__)
+                k: v for k, v in attrs.items() if k in attr.fields_dict(cls.__openllm_generation_class__)
             }
 
         for k in tuple(attrs.keys()):
             if k in generation_config:
                 del attrs[k]
 
-        return attr.evolve(env_struct, generation_config=generation_config, **attrs)
+        config_from_env.update(attrs)
+        config_from_env['generation_config'] = generation_config
+        return bentoml_cattr.structure(config_from_env, cls)
 
     def model_validate_click(self, **attrs: t.Any) -> tuple[LLMConfig, DictStrAny]:
         """Parse given click attributes into a LLMConfig and return the remaining click attributes."""
@@ -1779,7 +1781,12 @@ def peft_task_type(cls) -> str:
 
 bentoml_cattr.register_unstructure_hook_factory(
     lambda cls: lenient_issubclass(cls, LLMConfig),
-    lambda cls: make_dict_unstructure_fn(cls, bentoml_cattr, _cattrs_omit_if_default=False, _cattrs_use_linecache=True, ),
+    lambda cls: make_dict_unstructure_fn(
+        cls,
+        bentoml_cattr,
+        _cattrs_omit_if_default=False,
+        _cattrs_use_linecache=True,
+    ),
 )
 
 
@@ -1796,20 +1803,19 @@ def structure_llm_config(data: DictStrAny, cls: type[LLMConfig]) -> LLMConfig:
     if not LazyType(DictStrAny).isinstance(data):
         raise RuntimeError(f"Expected a dictionary, but got {type(data)}")
 
-    generation_cls_fields = attr.fields_dict(cls.__openllm_generation_class__)
     cls_attrs = {
-        k: v for k, v in data.items() if k in cls.__openllm_accepted_keys__ and k not in generation_cls_fields
+        k: v for k, v in data.items() if k in cls.__openllm_accepted_keys__
     }
+    generation_cls_fields = attr.fields_dict(cls.__openllm_generation_class__)
     if "generation_config" in data:
         generation_config = data.pop("generation_config")
         if not LazyType(DictStrAny).isinstance(generation_config):
             raise RuntimeError(f"Expected a dictionary, but got {type(generation_config)}")
         config_merger.merge(generation_config, {k: v for k, v in data.items() if k in generation_cls_fields})
     else:
         generation_config = {k: v for k, v in data.items() if k in generation_cls_fields}
-    not_extras = list(cls_attrs) + list(generation_config)
     # The rest should be passed to extras
-    data = {k: v for k, v in data.items() if k not in not_extras}
+    data = {k: v for k, v in data.items() if k not in cls.__openllm_accepted_keys__}
 
     return cls(generation_config=generation_config, __openllm_extras__=data, **cls_attrs)