From 5a49010a99306560713f9a849fdd190defced41a Mon Sep 17 00:00:00 2001 From: "Wang, Jian4" <61138589+hzjane@users.noreply.github.com> Date: Wed, 29 Oct 2025 14:49:19 +0800 Subject: [PATCH 1/4] Update readme (#137) * add known issue * update mineru usage * update_readme --- vllm/KNOWN_ISSUES.md | 4 ++-- vllm/README.md | 9 +++++---- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/vllm/KNOWN_ISSUES.md b/vllm/KNOWN_ISSUES.md index f9567d7..9c147e1 100644 --- a/vllm/KNOWN_ISSUES.md +++ b/vllm/KNOWN_ISSUES.md @@ -12,9 +12,9 @@ Workaround: Change the PCIe slot configuration in BIOS from Auto/x16 to x8/x8. With this change, over 40 GB/s bi-directional P2P bandwidth can be achieved. Root cause analysis is still in progress. -# 03. Container OOM killed by using `--enable-auto-tool-choice` and starting container not by /bin/bash and not run `source /opt/intel/oneapi/setvars.sh` +# 03. Container OOM killed (and vllm performance drop) when starting container not by /bin/bash and not run `source /opt/intel/oneapi/setvars.sh` -When using `--enable-auto-tool-choice` and deploy container by docker-compose without `source /opt/intel/oneapi/setvars.sh`, the LD_LIBRARY_PATH will be different and cause the container OOM. It can be reproduced by this two command: +When using `--enable-auto-tool-choice` and deploy container by docker-compose without `source /opt/intel/oneapi/setvars.sh`, the LD_LIBRARY_PATH will be different and cause the container OOM (or performance drop). It can be reproduced by this two command: ```bash docker run --rm --entrypoint "/bin/bash" --name=test intel/llm-scaler-vllm:latest -c env | grep LD_LIBRARY_PATH diff --git a/vllm/README.md b/vllm/README.md index 549dcd7..f81ab0c 100644 --- a/vllm/README.md +++ b/vllm/README.md @@ -2177,6 +2177,8 @@ curl http://localhost:8000/v1/chat/completions \ "max_tokens": 128 }' ``` + +if want to process image in server local, you can `"url": "file:/llm/models/test/1.jpg"` to test. --- ### 2.4.1 Audio Model Support [Deprecated] @@ -2319,12 +2321,11 @@ python3 -m vllm.entrypoints.openai.api_server \ #### Run the demo -To verify your setup, clone the official MinerU repository and run the demo script: +To verify mineru ```bash -git clone https://github.com/opendatalab/MinerU.git -cd MinerU/demo -python3 demo.py +#mineru -p -o -b vlm-http-client -u http://127.0.0.1:8000 +mineru -p /llm/MinerU/demo/pdfs/small_ocr.pdf -o ./ -b vlm-http-client -u http://127.0.0.1:8000 ``` --- From 64bec5a9c9bc10ae14e4ae956861dea25f0af222 Mon Sep 17 00:00:00 2001 From: hzjane Date: Thu, 30 Oct 2025 15:27:20 +0800 Subject: [PATCH 2/4] update mineru version --- vllm/docker/Dockerfile | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/vllm/docker/Dockerfile b/vllm/docker/Dockerfile index e388e79..dbb8e2a 100644 --- a/vllm/docker/Dockerfile +++ b/vllm/docker/Dockerfile @@ -57,12 +57,12 @@ RUN git clone -b v0.10.2 https://github.com/vllm-project/vllm.git && \ python3 setup.py install # Clone + patch miner-U -RUN git clone https://github.com/opendatalab/MinerU.git && \ +RUN git clone -b release-2.6.2 https://github.com/opendatalab/MinerU.git && \ cd MinerU && \ - git checkout de41fa58590263e43b783fe224b6d07cae290a33 && \ - git apply /tmp/miner-u.patch && \ - pip install -e .[core] && \ - sed -i 's/select_device(self.args.device, verbose=verbose)/torch.device(self.args.device)/' /usr/local/lib/python3.12/dist-packages/ultralytics/engine/predictor.py + pip install -e .[core] --no-deps && \ + pip install mineru_vl_utils==0.1.14 && \ + sed -i 's/max_concurrency: int = 100,/max_concurrency: int = 200,/' /usr/local/lib/python3.12/dist-packages/mineru_vl_utils/mineru_client.py && \ + sed -i 's/http_timeout: int = 600,/http_timeout: int = 1200,/' /usr/local/lib/python3.12/dist-packages/mineru_vl_utils/mineru_client.py # ======= Add oneCCL build ======= From 2ea45a68e90c070c9cb03fb89e9747d7e7882b4b Mon Sep 17 00:00:00 2001 From: hzjane Date: Thu, 30 Oct 2025 15:28:17 +0800 Subject: [PATCH 3/4] update readme --- vllm/Miner-U/README.md | 2 ++ vllm/README.md | 40 +++++++++++++++++++++++++++++----------- 2 files changed, 31 insertions(+), 11 deletions(-) diff --git a/vllm/Miner-U/README.md b/vllm/Miner-U/README.md index 79e2c86..d79bb03 100644 --- a/vllm/Miner-U/README.md +++ b/vllm/Miner-U/README.md @@ -53,3 +53,5 @@ mineru-gradio --server-name 0.0.0.0 --server-port 7860 ``` Refer to [here](https://opendatalab.github.io/MinerU/zh/usage/quick_usage/#_2) for more details. + +### Refer to [here](https://github.com/intel/llm-scaler/tree/main/vllm#243-mineru-26-support) for new version 2.6.1 of mineru-vllm, which has performance improvements. diff --git a/vllm/README.md b/vllm/README.md index f49d8a5..6a0693a 100644 --- a/vllm/README.md +++ b/vllm/README.md @@ -2278,16 +2278,9 @@ TORCH_LLM_ALLREDUCE=1 VLLM_USE_V1=1 CCL_ZE_IPC_EXCHANGE=pidfd VLLM_ALLOW_LONG_M --- -### 2.4.3 MinerU 2.5 Support +### 2.4.3 MinerU 2.6 Support -This guide shows how to launch the MinerU 2.5 model using the vLLM inference backend. - -#### Install MinerU Core - -First, install the core MinerU package: -```bash -pip install mineru[core] -``` +This guide shows how to launch the MinerU 2.6 model using the vLLM inference backend. #### Start the MinerU Service @@ -2320,14 +2313,39 @@ python3 -m vllm.entrypoints.openai.api_server \ -#### Run the demo -To verify mineru +#### how to use MinerU +1.To verify mineru by command line ```bash #mineru -p -o -b vlm-http-client -u http://127.0.0.1:8000 mineru -p /llm/MinerU/demo/pdfs/small_ocr.pdf -o ./ -b vlm-http-client -u http://127.0.0.1:8000 ``` +2.Using by gradio + +```bash +mineru-gradio --server-name 0.0.0.0 --server-port 8002 +``` + +```python +from gradio_client import Client, handle_file + +client = Client("http://localhost:8002/") +result = client.predict( + file_path=handle_file('/llm/MinerU/demo/pdfs/small_ocr.pdf'), + end_pages=500, + is_ocr=False, + formula_enable=True, + table_enable=True, + language="ch", + backend="vlm-http-client", + url="http://localhost:8000", + api_name="/to_markdown" +) +print(result) +``` +More details you can refer to gradio's [api guide](http://your_ip:8002/?view=api) + --- ### 2.5 Omni Model Support From 7845a3837b4fda5d59267d97f5156c3958764abd Mon Sep 17 00:00:00 2001 From: hzjane Date: Thu, 30 Oct 2025 16:26:34 +0800 Subject: [PATCH 4/4] update --- vllm/README.md | 3 +++ vllm/docker/Dockerfile | 6 +++--- 2 files changed, 6 insertions(+), 3 deletions(-) diff --git a/vllm/README.md b/vllm/README.md index 6a0693a..fa2aeac 100644 --- a/vllm/README.md +++ b/vllm/README.md @@ -2300,7 +2300,10 @@ python3 -m vllm.entrypoints.openai.api_server \ --trust-remote-code \ --gpu-memory-util 0.85 \ --no-enable-prefix-caching \ + --max-num-batched-tokens=32768 \ + --max-model-len=32768 \ --block-size 64 \ + --max-num-seqs 256 \ --served-model-name MinerU \ --tensor-parallel-size 1 \ --pipeline-parallel-size 1 \ diff --git a/vllm/docker/Dockerfile b/vllm/docker/Dockerfile index dbb8e2a..2018d05 100644 --- a/vllm/docker/Dockerfile +++ b/vllm/docker/Dockerfile @@ -60,9 +60,9 @@ RUN git clone -b v0.10.2 https://github.com/vllm-project/vllm.git && \ RUN git clone -b release-2.6.2 https://github.com/opendatalab/MinerU.git && \ cd MinerU && \ pip install -e .[core] --no-deps && \ - pip install mineru_vl_utils==0.1.14 && \ - sed -i 's/max_concurrency: int = 100,/max_concurrency: int = 200,/' /usr/local/lib/python3.12/dist-packages/mineru_vl_utils/mineru_client.py && \ - sed -i 's/http_timeout: int = 600,/http_timeout: int = 1200,/' /usr/local/lib/python3.12/dist-packages/mineru_vl_utils/mineru_client.py + pip install mineru_vl_utils==0.1.14 gradio gradio-client gradio-pdf && \ + sed -i 's/kwargs.get("max_concurrency", 100)/kwargs.get("max_concurrency", 200)/' /llm/MinerU/mineru/backend/vlm/vlm_analyze.py && \ + sed -i 's/kwargs.get("http_timeout", 600)/kwargs.get("http_timeout", 1200)/' /llm/MinerU/mineru/backend/vlm/vlm_analyze.py # ======= Add oneCCL build =======