From 18044b367d69ee549cf7fdd79e55c0d3dfe0bd4a Mon Sep 17 00:00:00 2001
From: HR Wu <5631010+heiruwu@users.noreply.github.com>
Date: Fri, 1 Mar 2024 19:48:52 +0800
Subject: [PATCH 1/8] feat(ray): add deployment handle return for ray CLI
 (#107)

Because

- `model-backend` needs `Ray` CLI to deploy dockerized application

This commit

- return deployment handle for CLI to reference
---
 instill/helpers/ray_config.py | 38 +++++++++++++++++++++++------------
 1 file changed, 25 insertions(+), 13 deletions(-)

diff --git a/instill/helpers/ray_config.py b/instill/helpers/ray_config.py
index 4d1b670..f28753b 100644
--- a/instill/helpers/ray_config.py
+++ b/instill/helpers/ray_config.py
@@ -1,5 +1,6 @@
 import os
 from typing import Callable, Optional
+from warnings import warn
 
 import ray
 from ray import serve
@@ -25,13 +26,11 @@ class InstillDeployable:
     def __init__(
         self,
         deployable: Deployment,
-        model_weight_or_folder_name: str,
         use_gpu: bool,
     ) -> None:
         self._deployment: Deployment = deployable
         self.use_gpu = use_gpu
         # params
-        self.model_weight_or_folder_name: str = model_weight_or_folder_name
         if use_gpu:
             self._update_num_cpus(0.25)
             self._update_num_gpus(0.2)
@@ -95,6 +94,8 @@ def update_min_replicas(self, num_replicas: int):
             autoscaling_config=new_autoscaling_config
         )
 
+        return self
+
     def update_max_replicas(self, num_replicas: int):
         new_autoscaling_config = DEFAULT_AUTOSCALING_CONFIG
         new_autoscaling_config["max_replicas"] = num_replicas
@@ -102,15 +103,23 @@ def update_max_replicas(self, num_replicas: int):
             autoscaling_config=new_autoscaling_config
         )
 
+        return self
+
+    def get_deployment_handle(self):
+        return self._deployment.bind()
+
     def deploy(self, model_folder_path: str, ray_addr: str, total_vram: str):
+        warn(
+            "Deploy/Undeploy will soon be remove from the scope of SDK",
+            PendingDeprecationWarning,
+        )
         if not ray.is_initialized():
             ray_addr = "ray://" + ray_addr.replace("9000", "10001")
             ray.init(address=ray_addr, runtime_env=DEFAULT_RUNTIME_ENV)
 
-        # /model-repository/{owner_type}/{owner_uid}/{model_id}/{weight}
-        model_path = "/".join([model_folder_path, self.model_weight_or_folder_name])
-        model_path_string_parts = model_path.split("/")
-        application_name = "_".join(model_path_string_parts[3:5])
+        # /model-repository/{owner_type}/{owner_uid}/{model_id}
+        model_path_string_parts = model_folder_path.split("/")
+        application_name = "_".join(model_path_string_parts[3:])
         model_name = application_name.split("_")[1]
 
         if self.use_gpu:
@@ -118,29 +127,32 @@ def deploy(self, model_folder_path: str, ray_addr: str, total_vram: str):
                 self._update_num_gpus(MODEL_VRAM_OVERRIDE_LIST[model_name])
             else:
                 self._update_num_gpus(
-                    self._determine_vram_usage(model_path, total_vram)
+                    self._determine_vram_usage(model_folder_path, total_vram)
                 )
         else:
-            self._update_memory(self._determine_ram_usage(model_path))
+            self._update_memory(self._determine_ram_usage(model_folder_path))
 
         if model_name in MODEL_VRAM_OVERRIDE_LIST:
             self.update_min_replicas(1)
             self.update_max_replicas(1)
 
         serve.run(
-            self._deployment.options(name=model_name).bind(model_path),
+            self._deployment.options(name=model_name).bind(),
             name=application_name,
             route_prefix=f"/{application_name}",
         )
 
     def undeploy(self, model_folder_path: str, ray_addr: str):
+        warn(
+            "Deploy/Undeploy will soon be remove from the scope of SDK",
+            PendingDeprecationWarning,
+        )
         if not ray.is_initialized():
             ray_addr = "ray://" + ray_addr.replace("9000", "10001")
             ray.init(address=ray_addr, runtime_env=DEFAULT_RUNTIME_ENV)
-        # /model-repository/{owner_type}/{owner_uid}/{model_id}/{weight}
-        model_path = "/".join([model_folder_path, self.model_weight_or_folder_name])
-        model_path_string_parts = model_path.split("/")
-        application_name = "_".join(model_path_string_parts[3:5])
+        # /model-repository/{owner_type}/{owner_uid}/{model_id}
+        model_path_string_parts = model_folder_path.split("/")
+        application_name = "_".join(model_path_string_parts[3:])
         serve.delete(application_name)
 
     def __call__(self):

From 7c42ae7bec22149cbdfb40a6ab523a11b411f94a Mon Sep 17 00:00:00 2001
From: HR Wu <5631010+heiruwu@users.noreply.github.com>
Date: Sat, 2 Mar 2024 04:38:58 +0800
Subject: [PATCH 2/8] feat(ray): support containerized model build and push
 (#108)

Because

- we need to provide easy-to-use script for user to build and push
containerized model to desired registry

This commit

- add `docker` dependency
- add `build` module script for easy image building and pushing
---
 instill/helpers/Dockerfile | 15 +++++++++++
 instill/helpers/build.py   | 55 ++++++++++++++++++++++++++++++++++++++
 instill/helpers/const.py   |  2 ++
 poetry.lock                | 33 ++++++++++++++++++-----
 pyproject.toml             |  1 +
 5 files changed, 100 insertions(+), 6 deletions(-)
 create mode 100644 instill/helpers/Dockerfile
 create mode 100644 instill/helpers/build.py

diff --git a/instill/helpers/Dockerfile b/instill/helpers/Dockerfile
new file mode 100644
index 0000000..1a9b5fe
--- /dev/null
+++ b/instill/helpers/Dockerfile
@@ -0,0 +1,15 @@
+ARG RAY_VERSION
+ARG PYTHON_VERSION
+ARG CUDA_SUFFIX
+
+FROM rayproject/ray:${RAY_VERSION}-py${PYTHON_VERSION}${CUDA_SUFFIX}
+
+RUN sudo apt-get update && sudo apt-get install curl -y
+
+ARG PACKAGES
+RUN for package in ${PACKAGES}; do \
+    pip install $package; \
+    done;
+
+WORKDIR /home/ray
+COPY model.py ./model.py
diff --git a/instill/helpers/build.py b/instill/helpers/build.py
new file mode 100644
index 0000000..d5f277c
--- /dev/null
+++ b/instill/helpers/build.py
@@ -0,0 +1,55 @@
+import os
+import shutil
+
+import docker
+import ray
+import yaml
+
+import instill
+from instill.helpers.const import DEFAULT_DEPENDENCIES
+
+if __name__ == "__main__":
+    client = docker.from_env()
+    shutil.copyfile(
+        __file__.replace("build.py", "Dockerfile"), os.getcwd() + "/Dockerfile"
+    )
+
+    try:
+        with open("instill.yaml", "r", encoding="utf8") as f:
+            config = yaml.safe_load(f)
+
+        build = config["build"]
+        registry = config["registry"]
+        repo = config["repo"]
+        tag = config["tag"]
+
+        python_version = build["python_version"]
+        ray_version = ray.__version__
+        instill_version = instill.__version__
+
+        cuda_suffix = "" if not build["gpu"] else "-cu121"
+
+        packages_str = ""
+        for p in build["python_packages"]:
+            packages_str += p + " "
+        for p in DEFAULT_DEPENDENCIES:
+            packages_str += p + " "
+        packages_str += f"instill-sdk=={instill_version}"
+
+        img, _ = client.images.build(
+            path="./",
+            rm=True,
+            tag=f"{repo}:{tag}",
+            buildargs={
+                "RAY_VERSION": ray_version,
+                "PYTHON_VERSION": python_version,
+                "PACKAGES": packages_str,
+            },
+            quiet=False,
+        )
+        img.tag(f"{registry}/{repo}", tag)
+        client.images.push(f"{registry}/{repo}", tag=tag)
+    except Exception as e:
+        print(e)
+    finally:
+        os.remove("Dockerfile")
diff --git a/instill/helpers/const.py b/instill/helpers/const.py
index 08d8849..9fb3da5 100644
--- a/instill/helpers/const.py
+++ b/instill/helpers/const.py
@@ -118,3 +118,5 @@ class VisualQuestionAnsweringInput:
     "llama2-7b": 0.4,
     "zephyr-7b": 0.4,
 }
+
+DEFAULT_DEPENDENCIES = ["protobuf==4.25.3", "grpcio-tools==1.62.0"]
diff --git a/poetry.lock b/poetry.lock
index 5e63456..53df211 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -233,8 +233,8 @@ files = [
 lazy-object-proxy = ">=1.4.0"
 typing-extensions = {version = ">=4.0.0", markers = "python_version < \"3.11\""}
 wrapt = [
-    {version = ">=1.11,<2", markers = "python_version < \"3.11\""},
     {version = ">=1.14,<2", markers = "python_version >= \"3.11\""},
+    {version = ">=1.11,<2", markers = "python_version < \"3.11\""},
 ]
 
 [[package]]
@@ -747,10 +747,10 @@ isort = ">=4.3.21,<6.0"
 jinja2 = ">=2.10.1,<4.0"
 packaging = "*"
 pydantic = [
-    {version = ">=1.5.1,<2.4.0 || >2.4.0,<3.0", extras = ["email"], markers = "python_version < \"3.10\""},
-    {version = ">=1.9.0,<2.4.0 || >2.4.0,<3.0", extras = ["email"], markers = "python_version >= \"3.10\" and python_version < \"3.11\""},
     {version = ">=1.10.0,<2.0.0 || >2.0.0,<2.0.1 || >2.0.1,<2.4.0 || >2.4.0,<3.0", extras = ["email"], markers = "python_version >= \"3.12\" and python_version < \"4.0\""},
     {version = ">=1.10.0,<2.4.0 || >2.4.0,<3.0", extras = ["email"], markers = "python_version >= \"3.11\" and python_version < \"3.12\""},
+    {version = ">=1.5.1,<2.4.0 || >2.4.0,<3.0", extras = ["email"], markers = "python_version < \"3.10\""},
+    {version = ">=1.9.0,<2.4.0 || >2.4.0,<3.0", extras = ["email"], markers = "python_version >= \"3.10\" and python_version < \"3.11\""},
 ]
 pyyaml = ">=6.0.1"
 toml = {version = ">=0.10.0,<1.0.0", markers = "python_version < \"3.11\""}
@@ -849,6 +849,27 @@ idna = ["idna (>=3.6)"]
 trio = ["trio (>=0.23)"]
 wmi = ["wmi (>=1.5.1)"]
 
+[[package]]
+name = "docker"
+version = "7.0.0"
+description = "A Python library for the Docker Engine API."
+optional = false
+python-versions = ">=3.8"
+files = [
+    {file = "docker-7.0.0-py3-none-any.whl", hash = "sha256:12ba681f2777a0ad28ffbcc846a69c31b4dfd9752b47eb425a274ee269c5e14b"},
+    {file = "docker-7.0.0.tar.gz", hash = "sha256:323736fb92cd9418fc5e7133bc953e11a9da04f4483f828b527db553f1e7e5a3"},
+]
+
+[package.dependencies]
+packaging = ">=14.0"
+pywin32 = {version = ">=304", markers = "sys_platform == \"win32\""}
+requests = ">=2.26.0"
+urllib3 = ">=1.26.0"
+
+[package.extras]
+ssh = ["paramiko (>=2.4.3)"]
+websockets = ["websocket-client (>=1.3.0)"]
+
 [[package]]
 name = "docopt"
 version = "0.6.2"
@@ -2931,8 +2952,8 @@ files = [
 astroid = ">=2.12.13,<=2.14.0-dev0"
 colorama = {version = ">=0.4.5", markers = "sys_platform == \"win32\""}
 dill = [
-    {version = ">=0.2", markers = "python_version < \"3.11\""},
     {version = ">=0.3.6", markers = "python_version >= \"3.11\""},
+    {version = ">=0.2", markers = "python_version < \"3.11\""},
 ]
 isort = ">=4.2.5,<6"
 mccabe = ">=0.6,<0.8"
@@ -3331,8 +3352,8 @@ filelock = "*"
 frozenlist = "*"
 gpustat = {version = ">=1.0.0", optional = true, markers = "extra == \"serve\""}
 grpcio = [
-    {version = ">=1.32.0", optional = true, markers = "python_version < \"3.10\" and extra == \"serve\""},
     {version = ">=1.42.0", optional = true, markers = "python_version >= \"3.10\" and extra == \"serve\""},
+    {version = ">=1.32.0", optional = true, markers = "python_version < \"3.10\" and extra == \"serve\""},
 ]
 jsonschema = "*"
 msgpack = ">=1.0.0,<2.0.0"
@@ -4386,4 +4407,4 @@ testing = ["big-O", "jaraco.functools", "jaraco.itertools", "more-itertools", "p
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.8,<3.13"
-content-hash = "9b0fb37caa5909e15182238cd320e00b140c158b050a84dc58c2da06d314bf7e"
+content-hash = "ad099f4430b73989c2b52b691f19f1b53e5fc5ff0fe7ab4c03a9aab23fe39f65"
diff --git a/pyproject.toml b/pyproject.toml
index ddb4e06..e9778a9 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -39,6 +39,7 @@ pydantic = ">=1.10.13"
 pillow = "^10.1.0"
 ray = {version = "2.9.3", extras = ["serve"]}
 jsonschema = "^4.20.0"
+docker = "^7.0.0"
 
 [tool.poetry.dev-dependencies]
 

From db795c208150f99ca9a6eeefbab4b132b5325c94 Mon Sep 17 00:00:00 2001
From: HR Wu <5631010+heiruwu@users.noreply.github.com>
Date: Tue, 5 Mar 2024 03:19:32 +0800
Subject: [PATCH 3/8] fix(dockerfile): copy all files in the same model dir
 (#109)

Because

- we need to copy model weight files along with config and model.py

This commit

- update `dockerfile` to copy all files in the same directory
---
 instill/helpers/Dockerfile | 2 +-
 instill/helpers/build.py   | 8 +++++---
 2 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/instill/helpers/Dockerfile b/instill/helpers/Dockerfile
index 1a9b5fe..7ea801b 100644
--- a/instill/helpers/Dockerfile
+++ b/instill/helpers/Dockerfile
@@ -12,4 +12,4 @@ RUN for package in ${PACKAGES}; do \
     done;
 
 WORKDIR /home/ray
-COPY model.py ./model.py
+COPY . .
diff --git a/instill/helpers/build.py b/instill/helpers/build.py
index d5f277c..9fbee9a 100644
--- a/instill/helpers/build.py
+++ b/instill/helpers/build.py
@@ -23,15 +23,16 @@
         repo = config["repo"]
         tag = config["tag"]
 
-        python_version = build["python_version"]
+        python_version = build["python_version"].replace(".", "")
         ray_version = ray.__version__
         instill_version = instill.__version__
 
         cuda_suffix = "" if not build["gpu"] else "-cu121"
 
         packages_str = ""
-        for p in build["python_packages"]:
-            packages_str += p + " "
+        if not build["python_packages"] is None:
+            for p in build["python_packages"]:
+                packages_str += p + " "
         for p in DEFAULT_DEPENDENCIES:
             packages_str += p + " "
         packages_str += f"instill-sdk=={instill_version}"
@@ -39,6 +40,7 @@
         img, _ = client.images.build(
             path="./",
             rm=True,
+            nocache=True,
             tag=f"{repo}:{tag}",
             buildargs={
                 "RAY_VERSION": ray_version,

From c263c9b81afaf38601067581761c6f3bfc878cf1 Mon Sep 17 00:00:00 2001
From: HR Wu <5631010+heiruwu@users.noreply.github.com>
Date: Tue, 5 Mar 2024 15:50:41 +0800
Subject: [PATCH 4/8] feat(ray): expose cpu/gpu resource allocation config
 (#110)

Because

- It is not practical to determine vram usage solely from model file
size

This commit

- expose cpu/gpu/ram resource allocation config to user
---
 instill/helpers/build.py      | 13 ++++++++++++-
 instill/helpers/ray_config.py | 32 +++++++++++++++++++++++---------
 2 files changed, 35 insertions(+), 10 deletions(-)

diff --git a/instill/helpers/build.py b/instill/helpers/build.py
index 9fbee9a..cb21212 100644
--- a/instill/helpers/build.py
+++ b/instill/helpers/build.py
@@ -7,15 +7,19 @@
 
 import instill
 from instill.helpers.const import DEFAULT_DEPENDENCIES
+from instill.utils.logger import Logger
 
 if __name__ == "__main__":
+    Logger.i("[Instill Builder] Setup docker...")
     client = docker.from_env()
     shutil.copyfile(
         __file__.replace("build.py", "Dockerfile"), os.getcwd() + "/Dockerfile"
     )
 
     try:
+        Logger.i("[Instill Builder] Loading config file...")
         with open("instill.yaml", "r", encoding="utf8") as f:
+            Logger.i("[Instill Builder] Parsing config file...")
             config = yaml.safe_load(f)
 
         build = config["build"]
@@ -37,6 +41,7 @@
             packages_str += p + " "
         packages_str += f"instill-sdk=={instill_version}"
 
+        Logger.i("[Instill Builder] Building model image...")
         img, _ = client.images.build(
             path="./",
             rm=True,
@@ -49,9 +54,15 @@
             },
             quiet=False,
         )
+        Logger.i(f"[Instill Builder] {registry}/{repo}:{tag} built")
         img.tag(f"{registry}/{repo}", tag)
+        client.images.remove(f"{repo}:{tag}")
+        Logger.i("[Instill Builder] Pushing model image...")
         client.images.push(f"{registry}/{repo}", tag=tag)
+        Logger.i(f"[Instill Builder] {registry}/{repo}:{tag} pushed")
     except Exception as e:
-        print(e)
+        Logger.e("[Instill Builder] Build failed")
+        Logger.e(e)
     finally:
         os.remove("Dockerfile")
+        Logger.i("[Instill Builder] Build successful")
diff --git a/instill/helpers/ray_config.py b/instill/helpers/ray_config.py
index f28753b..b8348ea 100644
--- a/instill/helpers/ray_config.py
+++ b/instill/helpers/ray_config.py
@@ -32,24 +32,34 @@ def __init__(
         self.use_gpu = use_gpu
         # params
         if use_gpu:
-            self._update_num_cpus(0.25)
-            self._update_num_gpus(0.2)
+            self.update_num_cpus(0.25)
+            self.update_num_gpus(0.2)
         else:
-            self._update_num_cpus(0.25)
+            self.update_num_cpus(0.25)
 
-    def _update_num_cpus(self, num_cpus: float):
+    def update_num_cpus(self, num_cpus: float):
         if self._deployment.ray_actor_options is not None:
             self._deployment.ray_actor_options.update({"num_cpus": num_cpus})
 
-    def _update_memory(self, memory: float):
+        return self
+
+    def update_memory(self, memory: float):
         if self._deployment.ray_actor_options is not None:
             self._deployment.ray_actor_options.update({"memory": memory})
 
-    def _update_num_gpus(self, num_gpus: float):
+        return self
+
+    def update_num_gpus(self, num_gpus: float):
         if self._deployment.ray_actor_options is not None:
             self._deployment.ray_actor_options.update({"num_gpus": num_gpus})
 
+        return self
+
     def _determine_vram_usage(self, model_path: str, total_vram: str):
+        warn(
+            "determine vram usage base on file size will soon be removed",
+            PendingDeprecationWarning,
+        )
         if total_vram == "":
             return 0.25
         if os.path.isfile(model_path):
@@ -75,6 +85,10 @@ def _determine_vram_usage(self, model_path: str, total_vram: str):
         raise ModelPathException
 
     def _determine_ram_usage(self, model_path: str):
+        warn(
+            "determine ram usage base on file size will soon be removed",
+            PendingDeprecationWarning,
+        )
         if os.path.isfile(model_path):
             return max(
                 RAM_MINIMUM_RESERVE * (1024 * 1024 * 1024),
@@ -124,13 +138,13 @@ def deploy(self, model_folder_path: str, ray_addr: str, total_vram: str):
 
         if self.use_gpu:
             if model_name in MODEL_VRAM_OVERRIDE_LIST:
-                self._update_num_gpus(MODEL_VRAM_OVERRIDE_LIST[model_name])
+                self.update_num_gpus(MODEL_VRAM_OVERRIDE_LIST[model_name])
             else:
-                self._update_num_gpus(
+                self.update_num_gpus(
                     self._determine_vram_usage(model_folder_path, total_vram)
                 )
         else:
-            self._update_memory(self._determine_ram_usage(model_folder_path))
+            self.update_memory(self._determine_ram_usage(model_folder_path))
 
         if model_name in MODEL_VRAM_OVERRIDE_LIST:
             self.update_min_replicas(1)

From 54998861b58b9fe4c0cea0600b3b8adf3c26ca94 Mon Sep 17 00:00:00 2001
From: Heiru Wu <heiru.wu@instill.tech>
Date: Wed, 6 Mar 2024 00:30:01 +0800
Subject: [PATCH 5/8] fix(ray): add forcerm to avoid missing packages

---
 instill/helpers/build.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/instill/helpers/build.py b/instill/helpers/build.py
index cb21212..2c0c1ad 100644
--- a/instill/helpers/build.py
+++ b/instill/helpers/build.py
@@ -46,6 +46,7 @@
             path="./",
             rm=True,
             nocache=True,
+            forcerm=True,
             tag=f"{repo}:{tag}",
             buildargs={
                 "RAY_VERSION": ray_version,

From 727c9d3f2939698418236a3aa5971fc9d4e9abcc Mon Sep 17 00:00:00 2001
From: HR Wu <5631010+heiruwu@users.noreply.github.com>
Date: Wed, 6 Mar 2024 17:35:45 +0800
Subject: [PATCH 6/8] fix(ray): show build logs and add pip timeout (#111)

Because

- It is hard to know what went wrong without build logs
- pip install tends to timeout for large packages installation

This commit

- print build logs
- add default timeout for pip package installation
---
 instill/helpers/Dockerfile | 2 +-
 instill/helpers/build.py   | 6 ++++--
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/instill/helpers/Dockerfile b/instill/helpers/Dockerfile
index 7ea801b..9d487d9 100644
--- a/instill/helpers/Dockerfile
+++ b/instill/helpers/Dockerfile
@@ -8,7 +8,7 @@ RUN sudo apt-get update && sudo apt-get install curl -y
 
 ARG PACKAGES
 RUN for package in ${PACKAGES}; do \
-    pip install $package; \
+    pip install --default-timeout=1000 --no-cache-dir $package; \
     done;
 
 WORKDIR /home/ray
diff --git a/instill/helpers/build.py b/instill/helpers/build.py
index 2c0c1ad..80d9a93 100644
--- a/instill/helpers/build.py
+++ b/instill/helpers/build.py
@@ -42,7 +42,7 @@
         packages_str += f"instill-sdk=={instill_version}"
 
         Logger.i("[Instill Builder] Building model image...")
-        img, _ = client.images.build(
+        img, logs = client.images.build(
             path="./",
             rm=True,
             nocache=True,
@@ -56,10 +56,12 @@
             quiet=False,
         )
         Logger.i(f"[Instill Builder] {registry}/{repo}:{tag} built")
+        for line in logs:
+            print(*line.values())
         img.tag(f"{registry}/{repo}", tag)
-        client.images.remove(f"{repo}:{tag}")
         Logger.i("[Instill Builder] Pushing model image...")
         client.images.push(f"{registry}/{repo}", tag=tag)
+        client.images.remove(f"{repo}:{tag}")
         Logger.i(f"[Instill Builder] {registry}/{repo}:{tag} pushed")
     except Exception as e:
         Logger.e("[Instill Builder] Build failed")

From 20992afcbc32031d48a1d4fee19029729e3f2506 Mon Sep 17 00:00:00 2001
From: HR Wu <5631010+heiruwu@users.noreply.github.com>
Date: Wed, 6 Mar 2024 17:48:19 +0800
Subject: [PATCH 7/8] docs(readme,notebook): update description (#112)

Because

- we remove the restriction of the model folder structure after
deprecating triton model support

This commit

- update custom model guide
---
 README.md                               |  4 ++--
 notebooks/serve_custom_chat_model.ipynb | 32 ++++++++++++-------------
 2 files changed, 17 insertions(+), 19 deletions(-)

diff --git a/README.md b/README.md
index 9769c51..b3325f9 100644
--- a/README.md
+++ b/README.md
@@ -14,8 +14,8 @@ Welcome to Instill Python SDK, where the world of AI-first application comes ali
 
 Before you jump into creating your first application with this SDK tool, we recommend you to get familiar with the core concepts of Instill Product first. You can check out our documentation here:
 
-- [Instill Core](http://instill.tech/docs/core/v1.0.0/welcome)
-- [Instill SDK](http://instill.tech/docs/sdk/v1.0.0/welcome)
+- [Instill Core](https://www.instill.tech/docs/latest/core/concepts)
+- [Instill SDK](https://www.instill.tech/docs/latest/sdk/python)
 
 ## Setup
 
diff --git a/notebooks/serve_custom_chat_model.ipynb b/notebooks/serve_custom_chat_model.ipynb
index 344f4a4..2d696fb 100644
--- a/notebooks/serve_custom_chat_model.ipynb
+++ b/notebooks/serve_custom_chat_model.ipynb
@@ -11,15 +11,13 @@
     "1. First we need to create a file structure like the following\n",
     "\n",
     "```bash\n",
-    ".\n",
-    "├── README.md\n",
-    "└── tiny_llama               <=== your model name\n",
-    "    └── 1                    <=== your model version\n",
-    "        ├── model.py         <=== your model file\n",
-    "        └── tinyllama        <=== model weights and dependecy folder clone from huggingface (remember to follow the LICENSE of each model)\n",
+    ".                    <=== your model folder\n",
+    "├── README.md        <=== your model README\n",
+    "├── model.py         <=== your model file\n",
+    "└── tinyllama        <=== model weights and dependecy folder clone from huggingface (remember to follow the LICENSE of each model)\n",
     "```\n",
     "\n",
-    "Within the `README.md` you will have to put in the info about the model inbetween the `---` section, and a brief intro down below. For example\n",
+    "Within the `README.md` you will have to put in the info about the model in-between the `---` section, and a brief intro down below. For example\n",
     "```\n",
     "---\n",
     "Task: TextGenerationChat\n",
@@ -28,7 +26,7 @@
     "  - TinyLlama-1.1B-Chat\n",
     "---\n",
     "\n",
-    "Learn more about it [here](https://www.instill.tech/docs/v0.9.0-beta/model/prepare#model-card-metadata)\n",
+    "Learn more about it [here](https://www.instill.tech/docs/latest/model/prepare#model-card-metadata)\n",
     "\n",
     "# Model-TinyLlama-1.1b-chat-dvc\n",
     "\n",
@@ -40,7 +38,7 @@
     "git lfs install\n",
     "git clone https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v1.0 $PROJECT_ROOT/{modelname}/{version}/tinyllama\n",
     "```\n",
-    "3. Next, we start writting our model file, which with the help of the SDK, is relatively similar to what you would expect when developing in your local environment."
+    "3. Next, we start writing our model file, which with the help of the SDK, is relatively similar to what you would expect when developing in your local environment."
    ]
   },
   {
@@ -74,12 +72,12 @@
     "class TinyLlama:\n",
     "    # within the __init__ function, setup the model instance with the desired framework, in this\n",
     "    # case is the pipeline from transformers\n",
-    "    def __init__(self, model_path: str):\n",
+    "    def __init__(self):\n",
     "        self.pipeline = pipeline(\n",
     "            \"text-generation\",\n",
-    "            model=model_path,\n",
-    "            torch_dtype=torch.float32,\n",
-    "            device_map=\"cpu\",\n",
+    "            model=\"tinyllama\",\n",
+    "            torch_dtype=torch.bfloat16,\n",
+    "            device_map=\"auto\",\n",
     "        )\n",
     "\n",
     "    # ModelMetadata tells the server what inputs and outputs the model is expecting\n",
@@ -211,17 +209,17 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "5. Finally, we can pack it up and serve it on `Instill Model`! Simply\n",
+    "5. Finally, we can pack it up and serve it on `Instill Core`! Simply\n",
     "```bash\n",
     "zip -r \"tiny-llama.zip\" .\n",
     "```\n",
     "Or alternatively, if you have a LFS server or DVC bucket setup somewhere, you can also push the files along with the `.dvc` or lfs files onto github, and use our github import.\n",
     "\n",
-    "Now go to `Model Hub` page on Instill console and create a model from local with this zip, and profit!\n",
+    "Now go to `Model` page on Instill console and create a model from local with this zip, and profit!\n",
     "\n",
     "Here is a sample request and response with this model\n",
     "\n",
-    "_*req:*_\n",
+    "_req:_\n",
     "```bash\n",
     "curl --location 'http://localhost:8080/model/v1alpha/users/admin/models/tinyllama/trigger' \\\n",
     "--header 'Content-Type: application/json' \\\n",
@@ -243,7 +241,7 @@
     "    ]\n",
     "}'\n",
     "```\n",
-    "_*resp:*_\n",
+    "_resp:_\n",
     "```json\n",
     "{\n",
     "    \"task\": \"TASK_TEXT_GENERATION_CHAT\",\n",

From 4c5da047faf1b12a2c6be414d8ea0a02aa998439 Mon Sep 17 00:00:00 2001
From: HR Wu <5631010+heiruwu@users.noreply.github.com>
Date: Tue, 12 Mar 2024 06:44:01 +0800
Subject: [PATCH 8/8] feat(ray): separate build and push functionality (#113)

Because

- user may want to push to multiple registries, it is undesirable to
define in `instill.yaml`

This commit

- separate `build` and `push` script
- remove `registry` from model config
---
 instill/helpers/build.py | 10 ++-----
 instill/helpers/push.py  | 56 ++++++++++++++++++++++++++++++++++++++++
 2 files changed, 58 insertions(+), 8 deletions(-)
 create mode 100644 instill/helpers/push.py

diff --git a/instill/helpers/build.py b/instill/helpers/build.py
index 80d9a93..f7a1cca 100644
--- a/instill/helpers/build.py
+++ b/instill/helpers/build.py
@@ -23,7 +23,6 @@
             config = yaml.safe_load(f)
 
         build = config["build"]
-        registry = config["registry"]
         repo = config["repo"]
         tag = config["tag"]
 
@@ -55,17 +54,12 @@
             },
             quiet=False,
         )
-        Logger.i(f"[Instill Builder] {registry}/{repo}:{tag} built")
         for line in logs:
             print(*line.values())
-        img.tag(f"{registry}/{repo}", tag)
-        Logger.i("[Instill Builder] Pushing model image...")
-        client.images.push(f"{registry}/{repo}", tag=tag)
-        client.images.remove(f"{repo}:{tag}")
-        Logger.i(f"[Instill Builder] {registry}/{repo}:{tag} pushed")
+        Logger.i(f"[Instill Builder] {repo}:{tag} built")
     except Exception as e:
         Logger.e("[Instill Builder] Build failed")
         Logger.e(e)
     finally:
         os.remove("Dockerfile")
-        Logger.i("[Instill Builder] Build successful")
+        Logger.i("[Instill Builder] Done")
diff --git a/instill/helpers/push.py b/instill/helpers/push.py
new file mode 100644
index 0000000..62cae6d
--- /dev/null
+++ b/instill/helpers/push.py
@@ -0,0 +1,56 @@
+import argparse
+import types
+
+import docker
+import yaml
+
+from instill.utils.logger import Logger
+
+if __name__ == "__main__":
+    Logger.i("[Instill Builder] Setup docker...")
+    client = docker.from_env()
+
+    parser = argparse.ArgumentParser()
+    parser.add_argument(
+        "-u",
+        "--url",
+        help="image registry url, in the format of host:port, default to docker.io",
+        default="docker.io",
+        required=False,
+    )
+
+    try:
+        args = parser.parse_args()
+
+        Logger.i("[Instill Builder] Loading config file...")
+        with open("instill.yaml", "r", encoding="utf8") as f:
+            Logger.i("[Instill Builder] Parsing config file...")
+            config = yaml.safe_load(f)
+
+        registry = args.url
+        repo = config["repo"]
+        tag = config["tag"]
+
+        img = client.images.get(name=f"{repo}:{tag}")
+        img.tag(f"{registry}/{repo}", tag)
+        Logger.i("[Instill Builder] Pushing model image...")
+        logs = client.images.push(f"{registry}/{repo}", tag=tag)
+        if isinstance(logs, types.GeneratorType):
+            for line in logs:
+                print(*line.values())
+        elif isinstance(logs, list):
+            for line in logs:
+                if "errorDetail" in line:
+                    raise RuntimeError(line["errorDetail"]["message"])
+                print(line)
+        else:
+            if "errorDetail" in logs:
+                err = logs.split('{"errorDetail":{"message":', 1)[1][1:-4]
+                raise RuntimeError(err)
+            print(logs)
+        Logger.i(f"[Instill Builder] {registry}/{repo}:{tag} pushed")
+    except Exception as e:
+        Logger.e("[Instill Builder] Push failed")
+        Logger.e(e)
+    finally:
+        Logger.i("[Instill Builder] Done")