diff --git a/README.md b/README.md index 9769c51..b3325f9 100644 --- a/README.md +++ b/README.md @@ -14,8 +14,8 @@ Welcome to Instill Python SDK, where the world of AI-first application comes ali Before you jump into creating your first application with this SDK tool, we recommend you to get familiar with the core concepts of Instill Product first. You can check out our documentation here: -- [Instill Core](http://instill.tech/docs/core/v1.0.0/welcome) -- [Instill SDK](http://instill.tech/docs/sdk/v1.0.0/welcome) +- [Instill Core](https://www.instill.tech/docs/latest/core/concepts) +- [Instill SDK](https://www.instill.tech/docs/latest/sdk/python) ## Setup diff --git a/instill/helpers/Dockerfile b/instill/helpers/Dockerfile new file mode 100644 index 0000000..9d487d9 --- /dev/null +++ b/instill/helpers/Dockerfile @@ -0,0 +1,15 @@ +ARG RAY_VERSION +ARG PYTHON_VERSION +ARG CUDA_SUFFIX + +FROM rayproject/ray:${RAY_VERSION}-py${PYTHON_VERSION}${CUDA_SUFFIX} + +RUN sudo apt-get update && sudo apt-get install curl -y + +ARG PACKAGES +RUN for package in ${PACKAGES}; do \ + pip install --default-timeout=1000 --no-cache-dir $package; \ + done; + +WORKDIR /home/ray +COPY . . diff --git a/instill/helpers/build.py b/instill/helpers/build.py new file mode 100644 index 0000000..f7a1cca --- /dev/null +++ b/instill/helpers/build.py @@ -0,0 +1,65 @@ +import os +import shutil + +import docker +import ray +import yaml + +import instill +from instill.helpers.const import DEFAULT_DEPENDENCIES +from instill.utils.logger import Logger + +if __name__ == "__main__": + Logger.i("[Instill Builder] Setup docker...") + client = docker.from_env() + shutil.copyfile( + __file__.replace("build.py", "Dockerfile"), os.getcwd() + "/Dockerfile" + ) + + try: + Logger.i("[Instill Builder] Loading config file...") + with open("instill.yaml", "r", encoding="utf8") as f: + Logger.i("[Instill Builder] Parsing config file...") + config = yaml.safe_load(f) + + build = config["build"] + repo = config["repo"] + tag = config["tag"] + + python_version = build["python_version"].replace(".", "") + ray_version = ray.__version__ + instill_version = instill.__version__ + + cuda_suffix = "" if not build["gpu"] else "-cu121" + + packages_str = "" + if not build["python_packages"] is None: + for p in build["python_packages"]: + packages_str += p + " " + for p in DEFAULT_DEPENDENCIES: + packages_str += p + " " + packages_str += f"instill-sdk=={instill_version}" + + Logger.i("[Instill Builder] Building model image...") + img, logs = client.images.build( + path="./", + rm=True, + nocache=True, + forcerm=True, + tag=f"{repo}:{tag}", + buildargs={ + "RAY_VERSION": ray_version, + "PYTHON_VERSION": python_version, + "PACKAGES": packages_str, + }, + quiet=False, + ) + for line in logs: + print(*line.values()) + Logger.i(f"[Instill Builder] {repo}:{tag} built") + except Exception as e: + Logger.e("[Instill Builder] Build failed") + Logger.e(e) + finally: + os.remove("Dockerfile") + Logger.i("[Instill Builder] Done") diff --git a/instill/helpers/const.py b/instill/helpers/const.py index 08d8849..9fb3da5 100644 --- a/instill/helpers/const.py +++ b/instill/helpers/const.py @@ -118,3 +118,5 @@ class VisualQuestionAnsweringInput: "llama2-7b": 0.4, "zephyr-7b": 0.4, } + +DEFAULT_DEPENDENCIES = ["protobuf==4.25.3", "grpcio-tools==1.62.0"] diff --git a/instill/helpers/push.py b/instill/helpers/push.py new file mode 100644 index 0000000..62cae6d --- /dev/null +++ b/instill/helpers/push.py @@ -0,0 +1,56 @@ +import argparse +import types + +import docker +import yaml + +from instill.utils.logger import Logger + +if __name__ == "__main__": + Logger.i("[Instill Builder] Setup docker...") + client = docker.from_env() + + parser = argparse.ArgumentParser() + parser.add_argument( + "-u", + "--url", + help="image registry url, in the format of host:port, default to docker.io", + default="docker.io", + required=False, + ) + + try: + args = parser.parse_args() + + Logger.i("[Instill Builder] Loading config file...") + with open("instill.yaml", "r", encoding="utf8") as f: + Logger.i("[Instill Builder] Parsing config file...") + config = yaml.safe_load(f) + + registry = args.url + repo = config["repo"] + tag = config["tag"] + + img = client.images.get(name=f"{repo}:{tag}") + img.tag(f"{registry}/{repo}", tag) + Logger.i("[Instill Builder] Pushing model image...") + logs = client.images.push(f"{registry}/{repo}", tag=tag) + if isinstance(logs, types.GeneratorType): + for line in logs: + print(*line.values()) + elif isinstance(logs, list): + for line in logs: + if "errorDetail" in line: + raise RuntimeError(line["errorDetail"]["message"]) + print(line) + else: + if "errorDetail" in logs: + err = logs.split('{"errorDetail":{"message":', 1)[1][1:-4] + raise RuntimeError(err) + print(logs) + Logger.i(f"[Instill Builder] {registry}/{repo}:{tag} pushed") + except Exception as e: + Logger.e("[Instill Builder] Push failed") + Logger.e(e) + finally: + Logger.i("[Instill Builder] Done") diff --git a/instill/helpers/ray_config.py b/instill/helpers/ray_config.py index 4d1b670..b8348ea 100644 --- a/instill/helpers/ray_config.py +++ b/instill/helpers/ray_config.py @@ -1,5 +1,6 @@ import os from typing import Callable, Optional +from warnings import warn import ray from ray import serve @@ -25,32 +26,40 @@ class InstillDeployable: def __init__( self, deployable: Deployment, - model_weight_or_folder_name: str, use_gpu: bool, ) -> None: self._deployment: Deployment = deployable self.use_gpu = use_gpu # params - self.model_weight_or_folder_name: str = model_weight_or_folder_name if use_gpu: - self._update_num_cpus(0.25) - self._update_num_gpus(0.2) + self.update_num_cpus(0.25) + self.update_num_gpus(0.2) else: - self._update_num_cpus(0.25) + self.update_num_cpus(0.25) - def _update_num_cpus(self, num_cpus: float): + def update_num_cpus(self, num_cpus: float): if self._deployment.ray_actor_options is not None: self._deployment.ray_actor_options.update({"num_cpus": num_cpus}) - def _update_memory(self, memory: float): + return self + + def update_memory(self, memory: float): if self._deployment.ray_actor_options is not None: self._deployment.ray_actor_options.update({"memory": memory}) - def _update_num_gpus(self, num_gpus: float): + return self + + def update_num_gpus(self, num_gpus: float): if self._deployment.ray_actor_options is not None: self._deployment.ray_actor_options.update({"num_gpus": num_gpus}) + return self + def _determine_vram_usage(self, model_path: str, total_vram: str): + warn( + "determine vram usage base on file size will soon be removed", + PendingDeprecationWarning, + ) if total_vram == "": return 0.25 if os.path.isfile(model_path): @@ -76,6 +85,10 @@ def _determine_vram_usage(self, model_path: str, total_vram: str): raise ModelPathException def _determine_ram_usage(self, model_path: str): + warn( + "determine ram usage base on file size will soon be removed", + PendingDeprecationWarning, + ) if os.path.isfile(model_path): return max( RAM_MINIMUM_RESERVE * (1024 * 1024 * 1024), @@ -95,6 +108,8 @@ def update_min_replicas(self, num_replicas: int): autoscaling_config=new_autoscaling_config ) + return self + def update_max_replicas(self, num_replicas: int): new_autoscaling_config = DEFAULT_AUTOSCALING_CONFIG new_autoscaling_config["max_replicas"] = num_replicas @@ -102,45 +117,56 @@ def update_max_replicas(self, num_replicas: int): autoscaling_config=new_autoscaling_config ) + return self + + def get_deployment_handle(self): + return self._deployment.bind() + def deploy(self, model_folder_path: str, ray_addr: str, total_vram: str): + warn( + "Deploy/Undeploy will soon be remove from the scope of SDK", + PendingDeprecationWarning, + ) if not ray.is_initialized(): ray_addr = "ray://" + ray_addr.replace("9000", "10001") ray.init(address=ray_addr, runtime_env=DEFAULT_RUNTIME_ENV) - # /model-repository/{owner_type}/{owner_uid}/{model_id}/{weight} - model_path = "/".join([model_folder_path, self.model_weight_or_folder_name]) - model_path_string_parts = model_path.split("/") - application_name = "_".join(model_path_string_parts[3:5]) + # /model-repository/{owner_type}/{owner_uid}/{model_id} + model_path_string_parts = model_folder_path.split("/") + application_name = "_".join(model_path_string_parts[3:]) model_name = application_name.split("_")[1] if self.use_gpu: if model_name in MODEL_VRAM_OVERRIDE_LIST: - self._update_num_gpus(MODEL_VRAM_OVERRIDE_LIST[model_name]) + self.update_num_gpus(MODEL_VRAM_OVERRIDE_LIST[model_name]) else: - self._update_num_gpus( - self._determine_vram_usage(model_path, total_vram) + self.update_num_gpus( + self._determine_vram_usage(model_folder_path, total_vram) ) else: - self._update_memory(self._determine_ram_usage(model_path)) + self.update_memory(self._determine_ram_usage(model_folder_path)) if model_name in MODEL_VRAM_OVERRIDE_LIST: self.update_min_replicas(1) self.update_max_replicas(1) serve.run( - self._deployment.options(name=model_name).bind(model_path), + self._deployment.options(name=model_name).bind(), name=application_name, route_prefix=f"/{application_name}", ) def undeploy(self, model_folder_path: str, ray_addr: str): + warn( + "Deploy/Undeploy will soon be remove from the scope of SDK", + PendingDeprecationWarning, + ) if not ray.is_initialized(): ray_addr = "ray://" + ray_addr.replace("9000", "10001") ray.init(address=ray_addr, runtime_env=DEFAULT_RUNTIME_ENV) - # /model-repository/{owner_type}/{owner_uid}/{model_id}/{weight} - model_path = "/".join([model_folder_path, self.model_weight_or_folder_name]) - model_path_string_parts = model_path.split("/") - application_name = "_".join(model_path_string_parts[3:5]) + # /model-repository/{owner_type}/{owner_uid}/{model_id} + model_path_string_parts = model_folder_path.split("/") + application_name = "_".join(model_path_string_parts[3:]) serve.delete(application_name) def __call__(self): diff --git a/notebooks/serve_custom_chat_model.ipynb b/notebooks/serve_custom_chat_model.ipynb index 344f4a4..2d696fb 100644 --- a/notebooks/serve_custom_chat_model.ipynb +++ b/notebooks/serve_custom_chat_model.ipynb @@ -11,15 +11,13 @@ "1. First we need to create a file structure like the following\n", "\n", "```bash\n", - ".\n", - "├── README.md\n", - "└── tiny_llama <=== your model name\n", - " └── 1 <=== your model version\n", - " ├── model.py <=== your model file\n", - " └── tinyllama <=== model weights and dependecy folder clone from huggingface (remember to follow the LICENSE of each model)\n", + ". <=== your model folder\n", + "├── README.md <=== your model README\n", + "├── model.py <=== your model file\n", + "└── tinyllama <=== model weights and dependecy folder clone from huggingface (remember to follow the LICENSE of each model)\n", "```\n", "\n", - "Within the `README.md` you will have to put in the info about the model inbetween the `---` section, and a brief intro down below. For example\n", + "Within the `README.md` you will have to put in the info about the model in-between the `---` section, and a brief intro down below. For example\n", "```\n", "---\n", "Task: TextGenerationChat\n", @@ -28,7 +26,7 @@ " - TinyLlama-1.1B-Chat\n", "---\n", "\n", - "Learn more about it [here](https://www.instill.tech/docs/v0.9.0-beta/model/prepare#model-card-metadata)\n", + "Learn more about it [here](https://www.instill.tech/docs/latest/model/prepare#model-card-metadata)\n", "\n", "# Model-TinyLlama-1.1b-chat-dvc\n", "\n", @@ -40,7 +38,7 @@ "git lfs install\n", "git clone https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v1.0 $PROJECT_ROOT/{modelname}/{version}/tinyllama\n", "```\n", - "3. Next, we start writting our model file, which with the help of the SDK, is relatively similar to what you would expect when developing in your local environment." + "3. Next, we start writing our model file, which with the help of the SDK, is relatively similar to what you would expect when developing in your local environment." ] }, { @@ -74,12 +72,12 @@ "class TinyLlama:\n", " # within the __init__ function, setup the model instance with the desired framework, in this\n", " # case is the pipeline from transformers\n", - " def __init__(self, model_path: str):\n", + " def __init__(self):\n", " self.pipeline = pipeline(\n", " \"text-generation\",\n", - " model=model_path,\n", - " torch_dtype=torch.float32,\n", - " device_map=\"cpu\",\n", + " model=\"tinyllama\",\n", + " torch_dtype=torch.bfloat16,\n", + " device_map=\"auto\",\n", " )\n", "\n", " # ModelMetadata tells the server what inputs and outputs the model is expecting\n", @@ -211,17 +209,17 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "5. Finally, we can pack it up and serve it on `Instill Model`! Simply\n", + "5. Finally, we can pack it up and serve it on `Instill Core`! Simply\n", "```bash\n", "zip -r \"tiny-llama.zip\" .\n", "```\n", "Or alternatively, if you have a LFS server or DVC bucket setup somewhere, you can also push the files along with the `.dvc` or lfs files onto github, and use our github import.\n", "\n", - "Now go to `Model Hub` page on Instill console and create a model from local with this zip, and profit!\n", + "Now go to `Model` page on Instill console and create a model from local with this zip, and profit!\n", "\n", "Here is a sample request and response with this model\n", "\n", - "_*req:*_\n", + "_req:_\n", "```bash\n", "curl --location 'http://localhost:8080/model/v1alpha/users/admin/models/tinyllama/trigger' \\\n", "--header 'Content-Type: application/json' \\\n", @@ -243,7 +241,7 @@ " ]\n", "}'\n", "```\n", - "_*resp:*_\n", + "_resp:_\n", "```json\n", "{\n", " \"task\": \"TASK_TEXT_GENERATION_CHAT\",\n", diff --git a/poetry.lock b/poetry.lock index 5e63456..53df211 100644 --- a/poetry.lock +++ b/poetry.lock @@ -233,8 +233,8 @@ files = [ lazy-object-proxy = ">=1.4.0" typing-extensions = {version = ">=4.0.0", markers = "python_version < \"3.11\""} wrapt = [ - {version = ">=1.11,<2", markers = "python_version < \"3.11\""}, {version = ">=1.14,<2", markers = "python_version >= \"3.11\""}, + {version = ">=1.11,<2", markers = "python_version < \"3.11\""}, ] [[package]] @@ -747,10 +747,10 @@ isort = ">=4.3.21,<6.0" jinja2 = ">=2.10.1,<4.0" packaging = "*" pydantic = [ - {version = ">=1.5.1,<2.4.0 || >2.4.0,<3.0", extras = ["email"], markers = "python_version < \"3.10\""}, - {version = ">=1.9.0,<2.4.0 || >2.4.0,<3.0", extras = ["email"], markers = "python_version >= \"3.10\" and python_version < \"3.11\""}, {version = ">=1.10.0,<2.0.0 || >2.0.0,<2.0.1 || >2.0.1,<2.4.0 || >2.4.0,<3.0", extras = ["email"], markers = "python_version >= \"3.12\" and python_version < \"4.0\""}, {version = ">=1.10.0,<2.4.0 || >2.4.0,<3.0", extras = ["email"], markers = "python_version >= \"3.11\" and python_version < \"3.12\""}, + {version = ">=1.5.1,<2.4.0 || >2.4.0,<3.0", extras = ["email"], markers = "python_version < \"3.10\""}, + {version = ">=1.9.0,<2.4.0 || >2.4.0,<3.0", extras = ["email"], markers = "python_version >= \"3.10\" and python_version < \"3.11\""}, ] pyyaml = ">=6.0.1" toml = {version = ">=0.10.0,<1.0.0", markers = "python_version < \"3.11\""} @@ -849,6 +849,27 @@ idna = ["idna (>=3.6)"] trio = ["trio (>=0.23)"] wmi = ["wmi (>=1.5.1)"] +[[package]] +name = "docker" +version = "7.0.0" +description = "A Python library for the Docker Engine API." +optional = false +python-versions = ">=3.8" +files = [ + {file = "docker-7.0.0-py3-none-any.whl", hash = "sha256:12ba681f2777a0ad28ffbcc846a69c31b4dfd9752b47eb425a274ee269c5e14b"}, + {file = "docker-7.0.0.tar.gz", hash = "sha256:323736fb92cd9418fc5e7133bc953e11a9da04f4483f828b527db553f1e7e5a3"}, +] + +[package.dependencies] +packaging = ">=14.0" +pywin32 = {version = ">=304", markers = "sys_platform == \"win32\""} +requests = ">=2.26.0" +urllib3 = ">=1.26.0" + +[package.extras] +ssh = ["paramiko (>=2.4.3)"] +websockets = ["websocket-client (>=1.3.0)"] + [[package]] name = "docopt" version = "0.6.2" @@ -2931,8 +2952,8 @@ files = [ astroid = ">=2.12.13,<=2.14.0-dev0" colorama = {version = ">=0.4.5", markers = "sys_platform == \"win32\""} dill = [ - {version = ">=0.2", markers = "python_version < \"3.11\""}, {version = ">=0.3.6", markers = "python_version >= \"3.11\""}, + {version = ">=0.2", markers = "python_version < \"3.11\""}, ] isort = ">=4.2.5,<6" mccabe = ">=0.6,<0.8" @@ -3331,8 +3352,8 @@ filelock = "*" frozenlist = "*" gpustat = {version = ">=1.0.0", optional = true, markers = "extra == \"serve\""} grpcio = [ - {version = ">=1.32.0", optional = true, markers = "python_version < \"3.10\" and extra == \"serve\""}, {version = ">=1.42.0", optional = true, markers = "python_version >= \"3.10\" and extra == \"serve\""}, + {version = ">=1.32.0", optional = true, markers = "python_version < \"3.10\" and extra == \"serve\""}, ] jsonschema = "*" msgpack = ">=1.0.0,<2.0.0" @@ -4386,4 +4407,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p [metadata] lock-version = "2.0" python-versions = ">=3.8,<3.13" -content-hash = "9b0fb37caa5909e15182238cd320e00b140c158b050a84dc58c2da06d314bf7e" +content-hash = "ad099f4430b73989c2b52b691f19f1b53e5fc5ff0fe7ab4c03a9aab23fe39f65" diff --git a/pyproject.toml b/pyproject.toml index ddb4e06..e9778a9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -39,6 +39,7 @@ pydantic = ">=1.10.13" pillow = "^10.1.0" ray = {version = "2.9.3", extras = ["serve"]} jsonschema = "^4.20.0" +docker = "^7.0.0" [tool.poetry.dev-dependencies]