instill-ai · heiruwu · Mar 13, 2024 · Mar 1, 2024 · Mar 1, 2024 · Mar 4, 2024
@@ -14,8 +14,8 @@ Welcome to Instill Python SDK, where the world of AI-first application comes ali
 
 Before you jump into creating your first application with this SDK tool, we recommend you to get familiar with the core concepts of Instill Product first. You can check out our documentation here:
 
-- [Instill Core](http://instill.tech/docs/core/v1.0.0/welcome)
-- [Instill SDK](http://instill.tech/docs/sdk/v1.0.0/welcome)
+- [Instill Core](https://www.instill.tech/docs/latest/core/concepts)
+- [Instill SDK](https://www.instill.tech/docs/latest/sdk/python)
 
 ## Setup
 

@@ -0,0 +1,15 @@
+ARG RAY_VERSION
+ARG PYTHON_VERSION
+ARG CUDA_SUFFIX
+
+FROM rayproject/ray:${RAY_VERSION}-py${PYTHON_VERSION}${CUDA_SUFFIX}
+
+RUN sudo apt-get update && sudo apt-get install curl -y
+
+ARG PACKAGES
+RUN for package in ${PACKAGES}; do \
+    pip install --default-timeout=1000 --no-cache-dir $package; \
+    done;
+
+WORKDIR /home/ray
+COPY . .
@@ -0,0 +1,65 @@
+import os
+import shutil
+
+import docker
+import ray
+import yaml
+
+import instill
+from instill.helpers.const import DEFAULT_DEPENDENCIES
+from instill.utils.logger import Logger
+
+if __name__ == "__main__":
+    Logger.i("[Instill Builder] Setup docker...")
+    client = docker.from_env()
+    shutil.copyfile(
+        __file__.replace("build.py", "Dockerfile"), os.getcwd() + "/Dockerfile"
+    )
+
+    try:
+        Logger.i("[Instill Builder] Loading config file...")
+        with open("instill.yaml", "r", encoding="utf8") as f:
+            Logger.i("[Instill Builder] Parsing config file...")
+            config = yaml.safe_load(f)
+
+        build = config["build"]
+        repo = config["repo"]
+        tag = config["tag"]
+
+        python_version = build["python_version"].replace(".", "")
+        ray_version = ray.__version__
+        instill_version = instill.__version__
+
+        cuda_suffix = "" if not build["gpu"] else "-cu121"
+
+        packages_str = ""
+        if not build["python_packages"] is None:
+            for p in build["python_packages"]:
+                packages_str += p + " "
+        for p in DEFAULT_DEPENDENCIES:
+            packages_str += p + " "
+        packages_str += f"instill-sdk=={instill_version}"
+
+        Logger.i("[Instill Builder] Building model image...")
+        img, logs = client.images.build(
+            path="./",
+            rm=True,
+            nocache=True,
+            forcerm=True,
+            tag=f"{repo}:{tag}",
+            buildargs={
+                "RAY_VERSION": ray_version,
+                "PYTHON_VERSION": python_version,
+                "PACKAGES": packages_str,
+            },
+            quiet=False,
+        )
+        for line in logs:
+            print(*line.values())
+        Logger.i(f"[Instill Builder] {repo}:{tag} built")
+    except Exception as e:
+        Logger.e("[Instill Builder] Build failed")
+        Logger.e(e)
+    finally:
+        os.remove("Dockerfile")
+        Logger.i("[Instill Builder] Done")
@@ -118,3 +118,5 @@ class VisualQuestionAnsweringInput:
     "llama2-7b": 0.4,
     "zephyr-7b": 0.4,
 }
+
+DEFAULT_DEPENDENCIES = ["protobuf==4.25.3", "grpcio-tools==1.62.0"]
@@ -0,0 +1,56 @@
+import argparse
+import types
+
+import docker
+import yaml
+
+from instill.utils.logger import Logger
+
+if __name__ == "__main__":
+    Logger.i("[Instill Builder] Setup docker...")
+    client = docker.from_env()
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "-u",
+        "--url",
+        help="image registry url, in the format of host:port, default to docker.io",
+        default="docker.io",
+        required=False,
+    )
+
+    try:
+        args = parser.parse_args()
+
+        Logger.i("[Instill Builder] Loading config file...")
+        with open("instill.yaml", "r", encoding="utf8") as f:
+            Logger.i("[Instill Builder] Parsing config file...")
+            config = yaml.safe_load(f)
+
+        registry = args.url
+        repo = config["repo"]
+        tag = config["tag"]
+
+        img = client.images.get(name=f"{repo}:{tag}")
+        img.tag(f"{registry}/{repo}", tag)
+        Logger.i("[Instill Builder] Pushing model image...")
+        logs = client.images.push(f"{registry}/{repo}", tag=tag)
+        if isinstance(logs, types.GeneratorType):
+            for line in logs:
+                print(*line.values())
+        elif isinstance(logs, list):
+            for line in logs:
+                if "errorDetail" in line:
+                    raise RuntimeError(line["errorDetail"]["message"])
+                print(line)
+        else:
+            if "errorDetail" in logs:
+                err = logs.split('{"errorDetail":{"message":', 1)[1][1:-4]
+                raise RuntimeError(err)
+            print(logs)
+        Logger.i(f"[Instill Builder] {registry}/{repo}:{tag} pushed")
+    except Exception as e:
+        Logger.e("[Instill Builder] Push failed")
+        Logger.e(e)
+    finally:
+        Logger.i("[Instill Builder] Done")
@@ -1,5 +1,6 @@
 import os
 from typing import Callable, Optional
+from warnings import warn
 
 import ray
 from ray import serve
@@ -25,32 +26,40 @@ class InstillDeployable:
     def __init__(
         self,
         deployable: Deployment,
-        model_weight_or_folder_name: str,
         use_gpu: bool,
     ) -> None:
         self._deployment: Deployment = deployable
         self.use_gpu = use_gpu
         # params
-        self.model_weight_or_folder_name: str = model_weight_or_folder_name
         if use_gpu:
-            self._update_num_cpus(0.25)
-            self._update_num_gpus(0.2)
+            self.update_num_cpus(0.25)
+            self.update_num_gpus(0.2)
         else:
-            self._update_num_cpus(0.25)
+            self.update_num_cpus(0.25)
 
-    def _update_num_cpus(self, num_cpus: float):
+    def update_num_cpus(self, num_cpus: float):
         if self._deployment.ray_actor_options is not None:
             self._deployment.ray_actor_options.update({"num_cpus": num_cpus})
 
-    def _update_memory(self, memory: float):
+        return self
+
+    def update_memory(self, memory: float):
         if self._deployment.ray_actor_options is not None:
             self._deployment.ray_actor_options.update({"memory": memory})
 
-    def _update_num_gpus(self, num_gpus: float):
+        return self
+
+    def update_num_gpus(self, num_gpus: float):
         if self._deployment.ray_actor_options is not None:
             self._deployment.ray_actor_options.update({"num_gpus": num_gpus})
 
+        return self
+
     def _determine_vram_usage(self, model_path: str, total_vram: str):
+        warn(
+            "determine vram usage base on file size will soon be removed",
+            PendingDeprecationWarning,
+        )
         if total_vram == "":
             return 0.25
         if os.path.isfile(model_path):
@@ -76,6 +85,10 @@ def _determine_vram_usage(self, model_path: str, total_vram: str):
         raise ModelPathException
 
     def _determine_ram_usage(self, model_path: str):
+        warn(
+            "determine ram usage base on file size will soon be removed",
+            PendingDeprecationWarning,
+        )
         if os.path.isfile(model_path):
             return max(
                 RAM_MINIMUM_RESERVE * (1024 * 1024 * 1024),
@@ -95,52 +108,65 @@ def update_min_replicas(self, num_replicas: int):
             autoscaling_config=new_autoscaling_config
         )
 
+        return self
+
     def update_max_replicas(self, num_replicas: int):
         new_autoscaling_config = DEFAULT_AUTOSCALING_CONFIG
         new_autoscaling_config["max_replicas"] = num_replicas
         self._deployment = self._deployment.options(
             autoscaling_config=new_autoscaling_config
         )
 
+        return self
+
+    def get_deployment_handle(self):
+        return self._deployment.bind()
+
     def deploy(self, model_folder_path: str, ray_addr: str, total_vram: str):
+        warn(
+            "Deploy/Undeploy will soon be remove from the scope of SDK",
+            PendingDeprecationWarning,
+        )
         if not ray.is_initialized():
             ray_addr = "ray://" + ray_addr.replace("9000", "10001")
             ray.init(address=ray_addr, runtime_env=DEFAULT_RUNTIME_ENV)
 
-        # /model-repository/{owner_type}/{owner_uid}/{model_id}/{weight}
-        model_path = "/".join([model_folder_path, self.model_weight_or_folder_name])
-        model_path_string_parts = model_path.split("/")
-        application_name = "_".join(model_path_string_parts[3:5])
+        # /model-repository/{owner_type}/{owner_uid}/{model_id}
+        model_path_string_parts = model_folder_path.split("/")
+        application_name = "_".join(model_path_string_parts[3:])
         model_name = application_name.split("_")[1]
 
         if self.use_gpu:
             if model_name in MODEL_VRAM_OVERRIDE_LIST:
-                self._update_num_gpus(MODEL_VRAM_OVERRIDE_LIST[model_name])
+                self.update_num_gpus(MODEL_VRAM_OVERRIDE_LIST[model_name])
             else:
-                self._update_num_gpus(
-                    self._determine_vram_usage(model_path, total_vram)
+                self.update_num_gpus(
+                    self._determine_vram_usage(model_folder_path, total_vram)
                 )
         else:
-            self._update_memory(self._determine_ram_usage(model_path))
+            self.update_memory(self._determine_ram_usage(model_folder_path))
 
         if model_name in MODEL_VRAM_OVERRIDE_LIST:
             self.update_min_replicas(1)
             self.update_max_replicas(1)
 
         serve.run(
-            self._deployment.options(name=model_name).bind(model_path),
+            self._deployment.options(name=model_name).bind(),
             name=application_name,
             route_prefix=f"/{application_name}",
         )
 
     def undeploy(self, model_folder_path: str, ray_addr: str):
+        warn(
+            "Deploy/Undeploy will soon be remove from the scope of SDK",
+            PendingDeprecationWarning,
+        )
         if not ray.is_initialized():
             ray_addr = "ray://" + ray_addr.replace("9000", "10001")
             ray.init(address=ray_addr, runtime_env=DEFAULT_RUNTIME_ENV)
-        # /model-repository/{owner_type}/{owner_uid}/{model_id}/{weight}
-        model_path = "/".join([model_folder_path, self.model_weight_or_folder_name])
-        model_path_string_parts = model_path.split("/")
-        application_name = "_".join(model_path_string_parts[3:5])
+        # /model-repository/{owner_type}/{owner_uid}/{model_id}
+        model_path_string_parts = model_folder_path.split("/")
+        application_name = "_".join(model_path_string_parts[3:])
         serve.delete(application_name)
 
     def __call__(self):

@@ -11,15 +11,13 @@
     "1. First we need to create a file structure like the following\n",
     "\n",
     "```bash\n",
-    ".\n",
-    "├── README.md\n",
-    "└── tiny_llama               <=== your model name\n",
-    "    └── 1                    <=== your model version\n",
-    "        ├── model.py         <=== your model file\n",
-    "        └── tinyllama        <=== model weights and dependecy folder clone from huggingface (remember to follow the LICENSE of each model)\n",
+    ".                    <=== your model folder\n",
+    "├── README.md        <=== your model README\n",
+    "├── model.py         <=== your model file\n",
+    "└── tinyllama        <=== model weights and dependecy folder clone from huggingface (remember to follow the LICENSE of each model)\n",
     "```\n",
     "\n",
-    "Within the `README.md` you will have to put in the info about the model inbetween the `---` section, and a brief intro down below. For example\n",
+    "Within the `README.md` you will have to put in the info about the model in-between the `---` section, and a brief intro down below. For example\n",
     "```\n",
     "---\n",
     "Task: TextGenerationChat\n",
@@ -28,7 +26,7 @@
     "  - TinyLlama-1.1B-Chat\n",
     "---\n",
     "\n",
-    "Learn more about it [here](https://www.instill.tech/docs/v0.9.0-beta/model/prepare#model-card-metadata)\n",
+    "Learn more about it [here](https://www.instill.tech/docs/latest/model/prepare#model-card-metadata)\n",
     "\n",
     "# Model-TinyLlama-1.1b-chat-dvc\n",
     "\n",
@@ -40,7 +38,7 @@
     "git lfs install\n",
     "git clone https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v1.0 $PROJECT_ROOT/{modelname}/{version}/tinyllama\n",
     "```\n",
-    "3. Next, we start writting our model file, which with the help of the SDK, is relatively similar to what you would expect when developing in your local environment."
+    "3. Next, we start writing our model file, which with the help of the SDK, is relatively similar to what you would expect when developing in your local environment."
    ]
   },
   {
@@ -74,12 +72,12 @@
     "class TinyLlama:\n",
     "    # within the __init__ function, setup the model instance with the desired framework, in this\n",
     "    # case is the pipeline from transformers\n",
-    "    def __init__(self, model_path: str):\n",
+    "    def __init__(self):\n",
     "        self.pipeline = pipeline(\n",
     "            \"text-generation\",\n",
-    "            model=model_path,\n",
-    "            torch_dtype=torch.float32,\n",
-    "            device_map=\"cpu\",\n",
+    "            model=\"tinyllama\",\n",
+    "            torch_dtype=torch.bfloat16,\n",
+    "            device_map=\"auto\",\n",
     "        )\n",
     "\n",
     "    # ModelMetadata tells the server what inputs and outputs the model is expecting\n",
@@ -211,17 +209,17 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "5. Finally, we can pack it up and serve it on `Instill Model`! Simply\n",
+    "5. Finally, we can pack it up and serve it on `Instill Core`! Simply\n",
     "```bash\n",
     "zip -r \"tiny-llama.zip\" .\n",
     "```\n",
     "Or alternatively, if you have a LFS server or DVC bucket setup somewhere, you can also push the files along with the `.dvc` or lfs files onto github, and use our github import.\n",
     "\n",
-    "Now go to `Model Hub` page on Instill console and create a model from local with this zip, and profit!\n",
+    "Now go to `Model` page on Instill console and create a model from local with this zip, and profit!\n",
     "\n",
     "Here is a sample request and response with this model\n",
     "\n",
-    "_*req:*_\n",
+    "_req:_\n",
     "```bash\n",
     "curl --location 'http://localhost:8080/model/v1alpha/users/admin/models/tinyllama/trigger' \\\n",
     "--header 'Content-Type: application/json' \\\n",
@@ -243,7 +241,7 @@
     "    ]\n",
     "}'\n",
     "```\n",
-    "_*resp:*_\n",
+    "_resp:_\n",
     "```json\n",
     "{\n",
     "    \"task\": \"TASK_TEXT_GENERATION_CHAT\",\n",