diff --git a/README.md b/README.md
index 51a467ae7..9e214617a 100644
--- a/README.md
+++ b/README.md
@@ -35,105 +35,79 @@ OpenLLM supports a wide range of state-of-the-art open-source LLMs. You can also
     <th>Start a Server</th>
   </tr>
   <tr>
-    <td>deepseek-r1</td>
-    <td>671B</td>
-    <td>80Gx16</td>
-    <td><code>openllm serve deepseek-r1:671b-fc3d</code></td>
-  </tr>
-  <tr>
-    <td>deepseek-r1-distill</td>
-    <td>14B</td>
-    <td>80G</td>
-    <td><code>openllm serve deepseek-r1-distill:qwen2.5-14b-98a9</code></td>
-  </tr>
-  <tr>
-    <td>deepseek-v3</td>
-    <td>671B</td>
-    <td>80Gx16</td>
-    <td><code>openllm serve deepseek-v3:671b-instruct-d7ec</code></td>
+    <td>deepseek</td>
+    <td>8B</td>
+    <td>24GB</td>
+    <td><code>openllm serve deepseek:r1-distill-llama3.1-8b-626a</code></td>
   </tr>
   <tr>
     <td>gemma2</td>
     <td>2B</td>
     <td>12G</td>
-    <td><code>openllm serve gemma2:2b-instruct-747d</code></td>
+    <td><code>openllm serve gemma2:2b-instruct-868c</code></td>
+  </tr>
+  <tr>
+    <td>hermes-3</td>
+    <td>8B</td>
+    <td>80G</td>
+    <td><code>openllm serve hermes-3:deep-llama3-8b-1242</code></td>
   </tr>
   <tr>
     <td>llama3.1</td>
     <td>8B</td>
     <td>24G</td>
-    <td><code>openllm serve llama3.1:8b-instruct-3c0c</code></td>
+    <td><code>openllm serve llama3.1:8b-instruct-a995</code></td>
   </tr>
   <tr>
     <td>llama3.2</td>
     <td>1B</td>
     <td>24G</td>
-    <td><code>openllm serve llama3.2:1b-instruct-f041</code></td>
+    <td><code>openllm serve llama3.2:1b-instruct-6fa1</code></td>
   </tr>
   <tr>
     <td>llama3.3</td>
     <td>70B</td>
     <td>80Gx2</td>
-    <td><code>openllm serve llama3.3:70b-instruct-b850</code></td>
+    <td><code>openllm serve llama3.3:70b-instruct-f791</code></td>
   </tr>
   <tr>
     <td>mistral</td>
     <td>8B</td>
     <td>24G</td>
-    <td><code>openllm serve mistral:8b-instruct-50e8</code></td>
+    <td><code>openllm serve mistral:8b-instruct-f4ed</code></td>
   </tr>
   <tr>
     <td>mistral-large</td>
     <td>123B</td>
     <td>80Gx4</td>
-    <td><code>openllm serve mistral-large:123b-instruct-1022</code></td>
-  </tr>
-  <tr>
-    <td>mistralai</td>
-    <td>24B</td>
-    <td>80G</td>
-    <td><code>openllm serve mistralai:24b-small-instruct-2501-0e69</code></td>
-  </tr>
-  <tr>
-    <td>mixtral</td>
-    <td>7B</td>
-    <td>80Gx2</td>
-    <td><code>openllm serve mixtral:8x7b-instruct-v0.1-b752</code></td>
+    <td><code>openllm serve mistral-large:123b-instruct-2407-e1ef</code></td>
   </tr>
   <tr>
     <td>phi4</td>
     <td>14B</td>
     <td>80G</td>
-    <td><code>openllm serve phi4:14b-c12d</code></td>
+    <td><code>openllm serve phi4:14b-a515</code></td>
   </tr>
   <tr>
     <td>pixtral</td>
     <td>12B</td>
     <td>80G</td>
-    <td><code>openllm serve pixtral:12b-240910-c344</code></td>
+    <td><code>openllm serve pixtral:12b-2409-a2e0</code></td>
   </tr>
   <tr>
     <td>qwen2.5</td>
     <td>7B</td>
     <td>24G</td>
-    <td><code>openllm serve qwen2.5:7b-instruct-3260</code></td>
+    <td><code>openllm serve qwen2.5:7b-instruct-dbe1</code></td>
   </tr>
   <tr>
     <td>qwen2.5-coder</td>
-    <td>7B</td>
-    <td>24G</td>
-    <td><code>openllm serve qwen2.5-coder:7b-instruct-e75d</code></td>
-  </tr>
-  <tr>
-    <td>qwen2.5vl</td>
     <td>3B</td>
     <td>24G</td>
-    <td><code>openllm serve qwen2.5vl:3b-instruct-4686</code></td>
+    <td><code>openllm serve qwen2.5-coder:3b-instruct-63b0</code></td>
   </tr>
 </table>
 
-...
-
 For the full model list, see the [OpenLLM models repository](https://github.com/bentoml/openllm-models).
 
 ## Start an LLM server
@@ -151,7 +125,7 @@ To start an LLM server locally, use the `openllm serve` command and specify the
 >    ```
 
 ```bash
-openllm serve openllm serve llama3.2:1b-instruct-f041
+openllm serve openllm serve llama3.2:1b-instruct-6fa1
 ```
 
 The server will be accessible at [http://localhost:3000](http://localhost:3000/), providing OpenAI-compatible APIs for interaction. You can call the endpoints with different frameworks and tools that support OpenAI-compatible APIs. Typically, you may need to specify the following:
@@ -235,7 +209,7 @@ openllm repo update
 To review a model’s information, run:
 
 ```bash
-openllm model get openllm serve llama3.2:1b-instruct-f041
+openllm model get openllm serve llama3.2:1b-instruct-6fa1
 ```
 
 ### Add a model to the default model repository
@@ -263,7 +237,7 @@ OpenLLM supports LLM cloud deployment via BentoML, the unified model serving fra
 [Sign up for BentoCloud](https://www.bentoml.com/) for free and [log in](https://docs.bentoml.com/en/latest/bentocloud/how-tos/manage-access-token.html). Then, run `openllm deploy` to deploy a model to BentoCloud:
 
 ```bash
-openllm deploy openllm serve llama3.2:1b-instruct-f041
+openllm deploy openllm serve llama3.2:1b-instruct-6fa1
 ```
 
 > [!NOTE]
@@ -296,3 +270,4 @@ This project uses the following open-source projects:
 - [astral-sh/uv](https://github.com/astral-sh/uv) for blazing fast model requirements installing
 
 We are grateful to the developers and contributors of these projects for their hard work and dedication.
+
diff --git a/README.md.tpl b/README.md.tpl
index 2bb38fc0b..cb162b792 100644
--- a/README.md.tpl
+++ b/README.md.tpl
@@ -44,7 +44,6 @@ OpenLLM supports a wide range of state-of-the-art open-source LLMs. You can also
   {%- endfor %}
 </table>
 
-...
 
 For the full model list, see the [OpenLLM models repository](https://github.com/bentoml/openllm-models).
 
diff --git a/gen_readme.py b/gen_readme.py
index 8e360078b..964492aba 100644
--- a/gen_readme.py
+++ b/gen_readme.py
@@ -5,18 +5,6 @@
 #     "uv",
 # ]
 # ///
+import subprocess, sys, pathlib, json, jinja2
 
-import subprocess, sys, pathlib, json
-
-from jinja2 import Environment, FileSystemLoader
-
-wd = pathlib.Path('.').parent
-model_dict = subprocess.run(
-    [sys.executable, '-m', 'uv', 'run', '--with-editable', '.', 'openllm', 'model', 'list', '--output', 'readme'],
-    capture_output=True,
-    text=True,
-    check=True,
-)
-E = Environment(loader=FileSystemLoader('.'))
-with (wd / 'README.md').open('w') as f:
-    f.write(E.get_template('README.md.tpl').render(model_dict=json.loads(model_dict.stdout.strip())))
+with (pathlib.Path('.').parent / 'README.md').open('w') as f: f.write(jinja2.Environment(loader=jinja2.FileSystemLoader('.')).get_template('README.md.tpl').render(model_dict=json.loads(subprocess.run([sys.executable, '-m', 'uv', 'run', '--with-editable', '.', 'openllm', 'model', 'list', '--output', 'readme'], text=True, check=True, capture_output=True).stdout.strip())))
diff --git a/uv.lock b/uv.lock
index 7dd74c400..b71126d8e 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1191,7 +1191,7 @@ wheels = [
 
 [[package]]
 name = "openai"
-version = "1.61.1"
+version = "1.63.0"
 source = { registry = "https://pypi.org/simple" }
 dependencies = [
     { name = "anyio" },
@@ -1203,9 +1203,9 @@ dependencies = [
     { name = "tqdm" },
     { name = "typing-extensions" },
 ]
-sdist = { url = "https://files.pythonhosted.org/packages/d9/cf/61e71ce64cf0a38f029da0f9a5f10c9fa0e69a7a977b537126dac50adfea/openai-1.61.1.tar.gz", hash = "sha256:ce1851507218209961f89f3520e06726c0aa7d0512386f0f977e3ac3e4f2472e", size = 350784 }
+sdist = { url = "https://files.pythonhosted.org/packages/4f/32/2049e973a646801df425aecdf88c6504ca878bdb3951fe12076fc30f2977/openai-1.63.0.tar.gz", hash = "sha256:597d7a1b35b113e5a09fcb953bdb1eef44f404a39985f3d7573b3ab09221fd66", size = 356710 }
 wheels = [
-    { url = "https://files.pythonhosted.org/packages/9a/b6/2e2a011b2dc27a6711376808b4cd8c922c476ea0f1420b39892117fa8563/openai-1.61.1-py3-none-any.whl", hash = "sha256:72b0826240ce26026ac2cd17951691f046e5be82ad122d20a8e1b30ca18bd11e", size = 463126 },
+    { url = "https://files.pythonhosted.org/packages/67/a0/e1fe4e87218639fc0a0927da5266c2978eaa0e2eb5437479ee64a11535bb/openai-1.63.0-py3-none-any.whl", hash = "sha256:a664dfc78f0a05ca46c3e21f344f840cf6bf7174f13cfa9de214ed28bfca1dda", size = 472282 },
 ]
 
 [[package]]
@@ -1233,7 +1233,7 @@ requires-dist = [
     { name = "dulwich" },
     { name = "huggingface-hub" },
     { name = "nvidia-ml-py" },
-    { name = "openai", specifier = "==1.61.1" },
+    { name = "openai", specifier = "==1.63.0" },
     { name = "pathlib" },
     { name = "pip-requirements-parser" },
     { name = "psutil" },