diff --git a/README.md b/README.md
index 51a467ae7..9e214617a 100644
--- a/README.md
+++ b/README.md
@@ -35,105 +35,79 @@ OpenLLM supports a wide range of state-of-the-art open-source LLMs. You can also
Start a Server |
- | deepseek-r1 |
- 671B |
- 80Gx16 |
- openllm serve deepseek-r1:671b-fc3d |
-
-
- | deepseek-r1-distill |
- 14B |
- 80G |
- openllm serve deepseek-r1-distill:qwen2.5-14b-98a9 |
-
-
- | deepseek-v3 |
- 671B |
- 80Gx16 |
- openllm serve deepseek-v3:671b-instruct-d7ec |
+ deepseek |
+ 8B |
+ 24GB |
+ openllm serve deepseek:r1-distill-llama3.1-8b-626a |
| gemma2 |
2B |
12G |
- openllm serve gemma2:2b-instruct-747d |
+ openllm serve gemma2:2b-instruct-868c |
+
+
+ | hermes-3 |
+ 8B |
+ 80G |
+ openllm serve hermes-3:deep-llama3-8b-1242 |
| llama3.1 |
8B |
24G |
- openllm serve llama3.1:8b-instruct-3c0c |
+ openllm serve llama3.1:8b-instruct-a995 |
| llama3.2 |
1B |
24G |
- openllm serve llama3.2:1b-instruct-f041 |
+ openllm serve llama3.2:1b-instruct-6fa1 |
| llama3.3 |
70B |
80Gx2 |
- openllm serve llama3.3:70b-instruct-b850 |
+ openllm serve llama3.3:70b-instruct-f791 |
| mistral |
8B |
24G |
- openllm serve mistral:8b-instruct-50e8 |
+ openllm serve mistral:8b-instruct-f4ed |
| mistral-large |
123B |
80Gx4 |
- openllm serve mistral-large:123b-instruct-1022 |
-
-
- | mistralai |
- 24B |
- 80G |
- openllm serve mistralai:24b-small-instruct-2501-0e69 |
-
-
- | mixtral |
- 7B |
- 80Gx2 |
- openllm serve mixtral:8x7b-instruct-v0.1-b752 |
+ openllm serve mistral-large:123b-instruct-2407-e1ef |
| phi4 |
14B |
80G |
- openllm serve phi4:14b-c12d |
+ openllm serve phi4:14b-a515 |
| pixtral |
12B |
80G |
- openllm serve pixtral:12b-240910-c344 |
+ openllm serve pixtral:12b-2409-a2e0 |
| qwen2.5 |
7B |
24G |
- openllm serve qwen2.5:7b-instruct-3260 |
+ openllm serve qwen2.5:7b-instruct-dbe1 |
| qwen2.5-coder |
- 7B |
- 24G |
- openllm serve qwen2.5-coder:7b-instruct-e75d |
-
-
- | qwen2.5vl |
3B |
24G |
- openllm serve qwen2.5vl:3b-instruct-4686 |
+ openllm serve qwen2.5-coder:3b-instruct-63b0 |
-...
-
For the full model list, see the [OpenLLM models repository](https://github.com/bentoml/openllm-models).
## Start an LLM server
@@ -151,7 +125,7 @@ To start an LLM server locally, use the `openllm serve` command and specify the
> ```
```bash
-openllm serve openllm serve llama3.2:1b-instruct-f041
+openllm serve openllm serve llama3.2:1b-instruct-6fa1
```
The server will be accessible at [http://localhost:3000](http://localhost:3000/), providing OpenAI-compatible APIs for interaction. You can call the endpoints with different frameworks and tools that support OpenAI-compatible APIs. Typically, you may need to specify the following:
@@ -235,7 +209,7 @@ openllm repo update
To review a model’s information, run:
```bash
-openllm model get openllm serve llama3.2:1b-instruct-f041
+openllm model get openllm serve llama3.2:1b-instruct-6fa1
```
### Add a model to the default model repository
@@ -263,7 +237,7 @@ OpenLLM supports LLM cloud deployment via BentoML, the unified model serving fra
[Sign up for BentoCloud](https://www.bentoml.com/) for free and [log in](https://docs.bentoml.com/en/latest/bentocloud/how-tos/manage-access-token.html). Then, run `openllm deploy` to deploy a model to BentoCloud:
```bash
-openllm deploy openllm serve llama3.2:1b-instruct-f041
+openllm deploy openllm serve llama3.2:1b-instruct-6fa1
```
> [!NOTE]
@@ -296,3 +270,4 @@ This project uses the following open-source projects:
- [astral-sh/uv](https://github.com/astral-sh/uv) for blazing fast model requirements installing
We are grateful to the developers and contributors of these projects for their hard work and dedication.
+
diff --git a/README.md.tpl b/README.md.tpl
index 2bb38fc0b..cb162b792 100644
--- a/README.md.tpl
+++ b/README.md.tpl
@@ -44,7 +44,6 @@ OpenLLM supports a wide range of state-of-the-art open-source LLMs. You can also
{%- endfor %}
-...
For the full model list, see the [OpenLLM models repository](https://github.com/bentoml/openllm-models).
diff --git a/gen_readme.py b/gen_readme.py
index 8e360078b..964492aba 100644
--- a/gen_readme.py
+++ b/gen_readme.py
@@ -5,18 +5,6 @@
# "uv",
# ]
# ///
+import subprocess, sys, pathlib, json, jinja2
-import subprocess, sys, pathlib, json
-
-from jinja2 import Environment, FileSystemLoader
-
-wd = pathlib.Path('.').parent
-model_dict = subprocess.run(
- [sys.executable, '-m', 'uv', 'run', '--with-editable', '.', 'openllm', 'model', 'list', '--output', 'readme'],
- capture_output=True,
- text=True,
- check=True,
-)
-E = Environment(loader=FileSystemLoader('.'))
-with (wd / 'README.md').open('w') as f:
- f.write(E.get_template('README.md.tpl').render(model_dict=json.loads(model_dict.stdout.strip())))
+with (pathlib.Path('.').parent / 'README.md').open('w') as f: f.write(jinja2.Environment(loader=jinja2.FileSystemLoader('.')).get_template('README.md.tpl').render(model_dict=json.loads(subprocess.run([sys.executable, '-m', 'uv', 'run', '--with-editable', '.', 'openllm', 'model', 'list', '--output', 'readme'], text=True, check=True, capture_output=True).stdout.strip())))
diff --git a/uv.lock b/uv.lock
index 7dd74c400..b71126d8e 100644
--- a/uv.lock
+++ b/uv.lock
@@ -1191,7 +1191,7 @@ wheels = [
[[package]]
name = "openai"
-version = "1.61.1"
+version = "1.63.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "anyio" },
@@ -1203,9 +1203,9 @@ dependencies = [
{ name = "tqdm" },
{ name = "typing-extensions" },
]
-sdist = { url = "https://files.pythonhosted.org/packages/d9/cf/61e71ce64cf0a38f029da0f9a5f10c9fa0e69a7a977b537126dac50adfea/openai-1.61.1.tar.gz", hash = "sha256:ce1851507218209961f89f3520e06726c0aa7d0512386f0f977e3ac3e4f2472e", size = 350784 }
+sdist = { url = "https://files.pythonhosted.org/packages/4f/32/2049e973a646801df425aecdf88c6504ca878bdb3951fe12076fc30f2977/openai-1.63.0.tar.gz", hash = "sha256:597d7a1b35b113e5a09fcb953bdb1eef44f404a39985f3d7573b3ab09221fd66", size = 356710 }
wheels = [
- { url = "https://files.pythonhosted.org/packages/9a/b6/2e2a011b2dc27a6711376808b4cd8c922c476ea0f1420b39892117fa8563/openai-1.61.1-py3-none-any.whl", hash = "sha256:72b0826240ce26026ac2cd17951691f046e5be82ad122d20a8e1b30ca18bd11e", size = 463126 },
+ { url = "https://files.pythonhosted.org/packages/67/a0/e1fe4e87218639fc0a0927da5266c2978eaa0e2eb5437479ee64a11535bb/openai-1.63.0-py3-none-any.whl", hash = "sha256:a664dfc78f0a05ca46c3e21f344f840cf6bf7174f13cfa9de214ed28bfca1dda", size = 472282 },
]
[[package]]
@@ -1233,7 +1233,7 @@ requires-dist = [
{ name = "dulwich" },
{ name = "huggingface-hub" },
{ name = "nvidia-ml-py" },
- { name = "openai", specifier = "==1.61.1" },
+ { name = "openai", specifier = "==1.63.0" },
{ name = "pathlib" },
{ name = "pip-requirements-parser" },
{ name = "psutil" },