Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ Welcome to Instill Python SDK, where the world of AI-first application comes ali

Before you jump into creating your first application with this SDK tool, we recommend you to get familiar with the core concepts of Instill Product first. You can check out our documentation here:

- [Instill Core](http://instill.tech/docs/core/v1.0.0/welcome)
- [Instill SDK](http://instill.tech/docs/sdk/v1.0.0/welcome)
- [Instill Core](https://www.instill.tech/docs/latest/core/concepts)
- [Instill SDK](https://www.instill.tech/docs/latest/sdk/python)

## Setup

Expand Down
15 changes: 15 additions & 0 deletions instill/helpers/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
ARG RAY_VERSION
ARG PYTHON_VERSION
ARG CUDA_SUFFIX

FROM rayproject/ray:${RAY_VERSION}-py${PYTHON_VERSION}${CUDA_SUFFIX}

RUN sudo apt-get update && sudo apt-get install curl -y

ARG PACKAGES
RUN for package in ${PACKAGES}; do \
pip install --default-timeout=1000 --no-cache-dir $package; \
done;

WORKDIR /home/ray
COPY . .
65 changes: 65 additions & 0 deletions instill/helpers/build.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
import os
import shutil

import docker
import ray
import yaml

import instill
from instill.helpers.const import DEFAULT_DEPENDENCIES
from instill.utils.logger import Logger

if __name__ == "__main__":
Logger.i("[Instill Builder] Setup docker...")
client = docker.from_env()
shutil.copyfile(
__file__.replace("build.py", "Dockerfile"), os.getcwd() + "/Dockerfile"
)

try:
Logger.i("[Instill Builder] Loading config file...")
with open("instill.yaml", "r", encoding="utf8") as f:
Logger.i("[Instill Builder] Parsing config file...")
config = yaml.safe_load(f)

build = config["build"]
repo = config["repo"]
tag = config["tag"]

python_version = build["python_version"].replace(".", "")
ray_version = ray.__version__
instill_version = instill.__version__

cuda_suffix = "" if not build["gpu"] else "-cu121"

packages_str = ""
if not build["python_packages"] is None:
for p in build["python_packages"]:
packages_str += p + " "
for p in DEFAULT_DEPENDENCIES:
packages_str += p + " "
packages_str += f"instill-sdk=={instill_version}"

Logger.i("[Instill Builder] Building model image...")
img, logs = client.images.build(
path="./",
rm=True,
nocache=True,
forcerm=True,
tag=f"{repo}:{tag}",
buildargs={
"RAY_VERSION": ray_version,
"PYTHON_VERSION": python_version,
"PACKAGES": packages_str,
},
quiet=False,
)
for line in logs:
print(*line.values())
Logger.i(f"[Instill Builder] {repo}:{tag} built")
except Exception as e:
Logger.e("[Instill Builder] Build failed")
Logger.e(e)
finally:
os.remove("Dockerfile")
Logger.i("[Instill Builder] Done")
2 changes: 2 additions & 0 deletions instill/helpers/const.py
Original file line number Diff line number Diff line change
Expand Up @@ -118,3 +118,5 @@ class VisualQuestionAnsweringInput:
"llama2-7b": 0.4,
"zephyr-7b": 0.4,
}

DEFAULT_DEPENDENCIES = ["protobuf==4.25.3", "grpcio-tools==1.62.0"]
56 changes: 56 additions & 0 deletions instill/helpers/push.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
import argparse
import types

import docker
import yaml

from instill.utils.logger import Logger

if __name__ == "__main__":
Logger.i("[Instill Builder] Setup docker...")
client = docker.from_env()

parser = argparse.ArgumentParser()
parser.add_argument(
"-u",
"--url",
help="image registry url, in the format of host:port, default to docker.io",
default="docker.io",
required=False,
)

try:
args = parser.parse_args()

Logger.i("[Instill Builder] Loading config file...")
with open("instill.yaml", "r", encoding="utf8") as f:
Logger.i("[Instill Builder] Parsing config file...")
config = yaml.safe_load(f)

registry = args.url
repo = config["repo"]
tag = config["tag"]

img = client.images.get(name=f"{repo}:{tag}")
img.tag(f"{registry}/{repo}", tag)
Logger.i("[Instill Builder] Pushing model image...")
logs = client.images.push(f"{registry}/{repo}", tag=tag)
if isinstance(logs, types.GeneratorType):
for line in logs:
print(*line.values())
elif isinstance(logs, list):
for line in logs:
if "errorDetail" in line:
raise RuntimeError(line["errorDetail"]["message"])
print(line)
else:
if "errorDetail" in logs:
err = logs.split('{"errorDetail":{"message":', 1)[1][1:-4]
raise RuntimeError(err)
print(logs)
Logger.i(f"[Instill Builder] {registry}/{repo}:{tag} pushed")
except Exception as e:
Logger.e("[Instill Builder] Push failed")
Logger.e(e)
finally:
Logger.i("[Instill Builder] Done")
68 changes: 47 additions & 21 deletions instill/helpers/ray_config.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import os
from typing import Callable, Optional
from warnings import warn

import ray
from ray import serve
Expand All @@ -25,32 +26,40 @@ class InstillDeployable:
def __init__(
self,
deployable: Deployment,
model_weight_or_folder_name: str,
use_gpu: bool,
) -> None:
self._deployment: Deployment = deployable
self.use_gpu = use_gpu
# params
self.model_weight_or_folder_name: str = model_weight_or_folder_name
if use_gpu:
self._update_num_cpus(0.25)
self._update_num_gpus(0.2)
self.update_num_cpus(0.25)
self.update_num_gpus(0.2)
else:
self._update_num_cpus(0.25)
self.update_num_cpus(0.25)

def _update_num_cpus(self, num_cpus: float):
def update_num_cpus(self, num_cpus: float):
if self._deployment.ray_actor_options is not None:
self._deployment.ray_actor_options.update({"num_cpus": num_cpus})

def _update_memory(self, memory: float):
return self

def update_memory(self, memory: float):
if self._deployment.ray_actor_options is not None:
self._deployment.ray_actor_options.update({"memory": memory})

def _update_num_gpus(self, num_gpus: float):
return self

def update_num_gpus(self, num_gpus: float):
if self._deployment.ray_actor_options is not None:
self._deployment.ray_actor_options.update({"num_gpus": num_gpus})

return self

def _determine_vram_usage(self, model_path: str, total_vram: str):
warn(
"determine vram usage base on file size will soon be removed",
PendingDeprecationWarning,
)
if total_vram == "":
return 0.25
if os.path.isfile(model_path):
Expand All @@ -76,6 +85,10 @@ def _determine_vram_usage(self, model_path: str, total_vram: str):
raise ModelPathException

def _determine_ram_usage(self, model_path: str):
warn(
"determine ram usage base on file size will soon be removed",
PendingDeprecationWarning,
)
if os.path.isfile(model_path):
return max(
RAM_MINIMUM_RESERVE * (1024 * 1024 * 1024),
Expand All @@ -95,52 +108,65 @@ def update_min_replicas(self, num_replicas: int):
autoscaling_config=new_autoscaling_config
)

return self

def update_max_replicas(self, num_replicas: int):
new_autoscaling_config = DEFAULT_AUTOSCALING_CONFIG
new_autoscaling_config["max_replicas"] = num_replicas
self._deployment = self._deployment.options(
autoscaling_config=new_autoscaling_config
)

return self

def get_deployment_handle(self):
return self._deployment.bind()

def deploy(self, model_folder_path: str, ray_addr: str, total_vram: str):
warn(
"Deploy/Undeploy will soon be remove from the scope of SDK",
PendingDeprecationWarning,
)
if not ray.is_initialized():
ray_addr = "ray://" + ray_addr.replace("9000", "10001")
ray.init(address=ray_addr, runtime_env=DEFAULT_RUNTIME_ENV)

# /model-repository/{owner_type}/{owner_uid}/{model_id}/{weight}
model_path = "/".join([model_folder_path, self.model_weight_or_folder_name])
model_path_string_parts = model_path.split("/")
application_name = "_".join(model_path_string_parts[3:5])
# /model-repository/{owner_type}/{owner_uid}/{model_id}
model_path_string_parts = model_folder_path.split("/")
application_name = "_".join(model_path_string_parts[3:])
model_name = application_name.split("_")[1]

if self.use_gpu:
if model_name in MODEL_VRAM_OVERRIDE_LIST:
self._update_num_gpus(MODEL_VRAM_OVERRIDE_LIST[model_name])
self.update_num_gpus(MODEL_VRAM_OVERRIDE_LIST[model_name])
else:
self._update_num_gpus(
self._determine_vram_usage(model_path, total_vram)
self.update_num_gpus(
self._determine_vram_usage(model_folder_path, total_vram)
)
else:
self._update_memory(self._determine_ram_usage(model_path))
self.update_memory(self._determine_ram_usage(model_folder_path))

if model_name in MODEL_VRAM_OVERRIDE_LIST:
self.update_min_replicas(1)
self.update_max_replicas(1)

serve.run(
self._deployment.options(name=model_name).bind(model_path),
self._deployment.options(name=model_name).bind(),
name=application_name,
route_prefix=f"/{application_name}",
)

def undeploy(self, model_folder_path: str, ray_addr: str):
warn(
"Deploy/Undeploy will soon be remove from the scope of SDK",
PendingDeprecationWarning,
)
if not ray.is_initialized():
ray_addr = "ray://" + ray_addr.replace("9000", "10001")
ray.init(address=ray_addr, runtime_env=DEFAULT_RUNTIME_ENV)
# /model-repository/{owner_type}/{owner_uid}/{model_id}/{weight}
model_path = "/".join([model_folder_path, self.model_weight_or_folder_name])
model_path_string_parts = model_path.split("/")
application_name = "_".join(model_path_string_parts[3:5])
# /model-repository/{owner_type}/{owner_uid}/{model_id}
model_path_string_parts = model_folder_path.split("/")
application_name = "_".join(model_path_string_parts[3:])
serve.delete(application_name)

def __call__(self):
Expand Down
32 changes: 15 additions & 17 deletions notebooks/serve_custom_chat_model.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,13 @@
"1. First we need to create a file structure like the following\n",
"\n",
"```bash\n",
".\n",
"├── README.md\n",
"└── tiny_llama <=== your model name\n",
" └── 1 <=== your model version\n",
" ├── model.py <=== your model file\n",
" └── tinyllama <=== model weights and dependecy folder clone from huggingface (remember to follow the LICENSE of each model)\n",
". <=== your model folder\n",
"├── README.md <=== your model README\n",
"├── model.py <=== your model file\n",
"└── tinyllama <=== model weights and dependecy folder clone from huggingface (remember to follow the LICENSE of each model)\n",
"```\n",
"\n",
"Within the `README.md` you will have to put in the info about the model inbetween the `---` section, and a brief intro down below. For example\n",
"Within the `README.md` you will have to put in the info about the model in-between the `---` section, and a brief intro down below. For example\n",
"```\n",
"---\n",
"Task: TextGenerationChat\n",
Expand All @@ -28,7 +26,7 @@
" - TinyLlama-1.1B-Chat\n",
"---\n",
"\n",
"Learn more about it [here](https://www.instill.tech/docs/v0.9.0-beta/model/prepare#model-card-metadata)\n",
"Learn more about it [here](https://www.instill.tech/docs/latest/model/prepare#model-card-metadata)\n",
"\n",
"# Model-TinyLlama-1.1b-chat-dvc\n",
"\n",
Expand All @@ -40,7 +38,7 @@
"git lfs install\n",
"git clone https://huggingface.co/TinyLlama/TinyLlama-1.1B-Chat-v1.0 $PROJECT_ROOT/{modelname}/{version}/tinyllama\n",
"```\n",
"3. Next, we start writting our model file, which with the help of the SDK, is relatively similar to what you would expect when developing in your local environment."
"3. Next, we start writing our model file, which with the help of the SDK, is relatively similar to what you would expect when developing in your local environment."
]
},
{
Expand Down Expand Up @@ -74,12 +72,12 @@
"class TinyLlama:\n",
" # within the __init__ function, setup the model instance with the desired framework, in this\n",
" # case is the pipeline from transformers\n",
" def __init__(self, model_path: str):\n",
" def __init__(self):\n",
" self.pipeline = pipeline(\n",
" \"text-generation\",\n",
" model=model_path,\n",
" torch_dtype=torch.float32,\n",
" device_map=\"cpu\",\n",
" model=\"tinyllama\",\n",
" torch_dtype=torch.bfloat16,\n",
" device_map=\"auto\",\n",
" )\n",
"\n",
" # ModelMetadata tells the server what inputs and outputs the model is expecting\n",
Expand Down Expand Up @@ -211,17 +209,17 @@
"cell_type": "markdown",
"metadata": {},
"source": [
"5. Finally, we can pack it up and serve it on `Instill Model`! Simply\n",
"5. Finally, we can pack it up and serve it on `Instill Core`! Simply\n",
"```bash\n",
"zip -r \"tiny-llama.zip\" .\n",
"```\n",
"Or alternatively, if you have a LFS server or DVC bucket setup somewhere, you can also push the files along with the `.dvc` or lfs files onto github, and use our github import.\n",
"\n",
"Now go to `Model Hub` page on Instill console and create a model from local with this zip, and profit!\n",
"Now go to `Model` page on Instill console and create a model from local with this zip, and profit!\n",
"\n",
"Here is a sample request and response with this model\n",
"\n",
"_*req:*_\n",
"_req:_\n",
"```bash\n",
"curl --location 'http://localhost:8080/model/v1alpha/users/admin/models/tinyllama/trigger' \\\n",
"--header 'Content-Type: application/json' \\\n",
Expand All @@ -243,7 +241,7 @@
" ]\n",
"}'\n",
"```\n",
"_*resp:*_\n",
"_resp:_\n",
"```json\n",
"{\n",
" \"task\": \"TASK_TEXT_GENERATION_CHAT\",\n",
Expand Down
Loading