From 7e311d5499f2476bacd902ea037470f1d1e3587d Mon Sep 17 00:00:00 2001
From: Tony Wang <78333580+tonywang10101@users.noreply.github.com>
Date: Thu, 28 Mar 2024 17:36:07 +0800
Subject: [PATCH] feat(model): support new models (#457)

Because

- in model-backend 0.12.0, we are going to utilize 4 A100 40G GPU

This commit

- update model config based on the following strategy

GPU1:
yolov7
mobilenetv2
yolov7-stomata
llava 13b (needs 26G VRAM)

GPU 2:
stable-diffusion-xl  (needs 16G VRAM)
controlnet-canny  (needs 16G VRAM)

GPU 3:
llama2-7b  (needs 16G VRAM)
llama2-7b-chat  (needs 16G VRAM)

GPU 4:
llamacode 7b   (needs 16G VRAM)
zephyr-7b    (needs 16G VRAM)
---
 model-hub/model_hub_gpu.json | 62 +++++++++++++++++++++---------------
 1 file changed, 36 insertions(+), 26 deletions(-)

diff --git a/model-hub/model_hub_gpu.json b/model-hub/model_hub_gpu.json
index 25cbeccb..4fd45416 100644
--- a/model-hub/model_hub_gpu.json
+++ b/model-hub/model_hub_gpu.json
@@ -29,6 +29,36 @@
       "tag": "v0.7.0-yolov7-mask-ray-gpu"
     }
   },
+  {
+    "id": "llava-1-6-13b",
+    "description": "LLaVa-13b, from liuhaotian, is trained to generate text based on your prompts with miltimodal input.",
+    "task": "TASK_VISUAL_QUESTION_ANSWERING",
+    "model_definition": "model-definitions/github",
+    "configuration": {
+      "repository": "instill-ai/model-llava-13b-dvc",
+      "tag": "f16-gpuAuto-transformer-ray-v0.11.0"
+    }
+  },
+  {
+    "id": "stable-diffusion-xl",
+    "description": "Stable-Diffusion-XL, from StabilityAI, is trained to generate image based on your prompts.",
+    "task": "TASK_TEXT_TO_IMAGE",
+    "model_definition": "model-definitions/github",
+    "configuration": {
+      "repository": "instill-ai/model-diffusion-xl-dvc",
+      "tag": "f16-gpuAuto-diffusers-ray-v0.8.0"
+    }
+  },
+  {
+    "id": "controlnet-canny",
+    "description": "ControlNet-Canny Version, from Lvmin, is trained to generate image based on your prompts and images.",
+    "task": "TASK_IMAGE_TO_IMAGE",
+    "model_definition": "model-definitions/github",
+    "configuration": {
+      "repository": "instill-ai/model-controlnet-dvc",
+      "tag": "f16-gpuAuto-diffusers-ray-v0.8.0"
+    }
+  },
   {
     "id": "llama2-7b",
     "description": "Llama2-7b, from meta, is trained to generate text based on your prompts.",
@@ -49,16 +79,6 @@
       "tag": "f16-gpuAuto-transformer-ray-v0.8.0"
     }
   },
-  {
-    "id": "llava-1-6-13b",
-    "description": "LLaVa-13b, from liuhaotian, is trained to generate text based on your prompts with miltimodal input.",
-    "task": "TASK_VISUAL_QUESTION_ANSWERING",
-    "model_definition": "model-definitions/github",
-    "configuration": {
-      "repository": "instill-ai/model-llava-13b-dvc",
-      "tag": "f16-gpuAuto-transformer-ray-v0.11.0"
-    }
-  },
   {
     "id": "zephyr-7b",
     "description": "Zephyr-7b, from Huggingface, is trained to generate text based on your prompts.",
@@ -66,27 +86,17 @@
     "model_definition": "model-definitions/github",
     "configuration": {
       "repository": "instill-ai/model-zephyr-7b-dvc",
-      "tag": "f32-cpu-transformer-ray-v0.8.0"
-    }
-  },
-  {
-    "id": "stable-diffusion-xl",
-    "description": "Stable-Diffusion-XL, from StabilityAI, is trained to generate image based on your prompts.",
-    "task": "TASK_TEXT_TO_IMAGE",
-    "model_definition": "model-definitions/github",
-    "configuration": {
-      "repository": "instill-ai/model-diffusion-xl-dvc",
-      "tag": "f16-gpuAuto-diffusers-ray-v0.8.0"
+      "tag": "f16-1gpu-transformer-ray-v0.11.0"
     }
   },
   {
-    "id": "controlnet-canny",
-    "description": "ControlNet-Canny Version, from Lvmin, is trained to generate image based on your prompts and images.",
-    "task": "TASK_IMAGE_TO_IMAGE",
+    "id": "llamacode-7b",
+    "description": "Llamacode-7b, from Huggingface, is trained to generate text based on your prompts.",
+    "task": "TASK_TEXT_GENERATION_CHAT",
     "model_definition": "model-definitions/github",
     "configuration": {
-      "repository": "instill-ai/model-controlnet-dvc",
-      "tag": "f16-gpuAuto-diffusers-ray-v0.8.0"
+      "repository": "instill-ai/model-codellama-7b-dvc",
+      "tag": "f16-1gpu-transformer-ray-v0.11.0"
     }
   }
 ]