intel · gc-fu · Nov 3, 2025 · Oct 28, 2025 · Oct 28, 2025 · Oct 29, 2025
diff --git a/vllm/Miner-U/README.md b/vllm/Miner-U/README.md
@@ -53,3 +53,5 @@ mineru-gradio --server-name 0.0.0.0 --server-port 7860
 ```
 
 Refer to [here](https://opendatalab.github.io/MinerU/zh/usage/quick_usage/#_2) for more details.
+
+### Refer to [here](https://github.com/intel/llm-scaler/tree/main/vllm#243-mineru-26-support) for new version 2.6.1 of mineru-vllm, which has performance improvements.
diff --git a/vllm/README.md b/vllm/README.md
@@ -2278,16 +2278,9 @@ TORCH_LLM_ALLREDUCE=1 VLLM_USE_V1=1  CCL_ZE_IPC_EXCHANGE=pidfd VLLM_ALLOW_LONG_M
 
 ---
 
-### 2.4.3 MinerU 2.5 Support
+### 2.4.3 MinerU 2.6 Support
 
-This guide shows how to launch the MinerU 2.5 model using the vLLM inference backend.
-
-#### Install MinerU Core
-
-First, install the core MinerU package:
-```bash
-pip install mineru[core]
-```
+This guide shows how to launch the MinerU 2.6 model using the vLLM inference backend.
 
 #### Start the MinerU Service
 
@@ -2307,7 +2300,10 @@ python3 -m vllm.entrypoints.openai.api_server \
   --trust-remote-code \
   --gpu-memory-util 0.85 \
   --no-enable-prefix-caching \
+  --max-num-batched-tokens=32768 \
+  --max-model-len=32768 \
   --block-size 64 \
+  --max-num-seqs 256 \
   --served-model-name MinerU \
   --tensor-parallel-size 1 \
   --pipeline-parallel-size 1 \
@@ -2328,6 +2324,31 @@ To verify mineru
 mineru -p /llm/MinerU/demo/pdfs/small_ocr.pdf -o ./ -b vlm-http-client -u http://127.0.0.1:8000
 ```
 
+2.Using by gradio
+
+```bash
+mineru-gradio --server-name 0.0.0.0 --server-port 8002
+```
+
+```python
+from gradio_client import Client, handle_file
+
+client = Client("http://localhost:8002/")
+result = client.predict(
+    file_path=handle_file('/llm/MinerU/demo/pdfs/small_ocr.pdf'),
+    end_pages=500,
+    is_ocr=False,
+    formula_enable=True,
+    table_enable=True,
+    language="ch",
+    backend="vlm-http-client",
+    url="http://localhost:8000",
+    api_name="/to_markdown"
+)
+print(result)
+```
+More details you can refer to gradio's [api guide](http://your_ip:8002/?view=api)
+
 ---
 
 ### 2.5 Omni Model Support
@@ -2362,7 +2383,7 @@ python3 -m vllm.entrypoints.openai.api_server \
 
 After starting the vLLM service, you can follow this link to use it
 
-#### [Qwen2.5-Omni input](https://github.com/QwenLM/Qwen2.5-Omni?tab=readme-ov-file#vllm-serve-usage)
+#### [Qwen-Omni input](https://github.com/QwenLM/Qwen2.5-Omni?tab=readme-ov-file#vllm-serve-usage)
 
 ```bash
 curl http://localhost:8000/v1/chat/completions \
@@ -2383,6 +2404,25 @@ An example responce is listed below:
 ```json
 {"id":"chatcmpl-xxx","object":"chat.completion","model":"Qwen2.5-Omni-7B","choices":[{"index":0,"message":{"role":"assistant","reasoning_content":null,"content":"The text in the image is \"TONGYI Qwen\". The sound in the audio is a cough.","tool_calls":[]},"logprobs":null,"finish_reason":"stop","stop_reason":null}],"usage":{"prompt_tokens":156,"total_tokens":180,"completion_tokens":24,"prompt_tokens_details":null},"prompt_logprobs":null,"kv_transfer_params":null}
 ```
+
+For video input, one can input like this:
+
+```bash
+curl -sS http://localhost:8000/v1/chat/completions   -H "Content-Type: application/json"   -d '{
+    "model": "Qwen3-Omni-30B-A3B-Instruct",
+    "temperature": 0,
+    "max_tokens": 1024,
+    "messages": [{
+      "role": "user",
+      "content": [
+        { "type": "text", "text": "Please describe the video comprehensively as much as possible." },
+        { "type": "video_url", "video_url": { "url": "https://raw.githubusercontent.com/EvolvingLMMs-Lab/sglang/dev/onevision_local/assets/jobs.mp4" } }
+      ]
+    }]
+  }'
+```
+
+
 ---
 
 ### 2.6 Data Parallelism (DP)

diff --git a/vllm/docker/Dockerfile b/vllm/docker/Dockerfile
@@ -16,7 +16,6 @@ RUN wget -O- https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRO
 RUN apt-get update -y && \
     # apt-get install -y software-properties-common && \
     # add-apt-repository ppa:deadsnakes/ppa && \
-    apt-get update -y && \
     apt-get install -y python3.12 python3.12-dev python3-pip && \
     update-alternatives --install /usr/bin/python3 python3 /usr/bin/python3.12 1 && \
     update-alternatives --install /usr/bin/python python /usr/bin/python3.12 1 && \
@@ -34,7 +33,6 @@ RUN apt-get update -y && \
         vim \
         linux-libc-dev && \
     # Install Intel GPU runtime packages
-    apt-get update -y && \
     apt-get install -y intel-oneapi-dpcpp-ct=2025.1.0-452 && \
     apt-get clean && rm -rf /var/lib/apt/lists/*
 
@@ -51,20 +49,20 @@ ENV VLLM_WORKER_MULTIPROC_METHOD=spawn
 RUN python3 -m pip config set global.break-system-packages true
 
 # Clone + patch vllm
-RUN git clone -b v0.10.0 https://github.com/vllm-project/vllm.git && \
+RUN git clone -b v0.10.2 https://github.com/vllm-project/vllm.git && \
     cd vllm && \
     git apply /tmp/vllm_for_multi_arc.patch && \
     pip install --no-cache-dir -r requirements/xpu.txt && \
     export CPATH=/opt/intel/oneapi/dpcpp-ct/2025.1/include/:${CPATH} && \
     python3 setup.py install
 
 # Clone + patch miner-U
-RUN git clone https://github.com/opendatalab/MinerU.git && \
+RUN git clone -b release-2.6.2 https://github.com/opendatalab/MinerU.git && \
     cd MinerU && \
-    git checkout de41fa58590263e43b783fe224b6d07cae290a33 && \
-    git apply /tmp/miner-u.patch && \
-    pip install -e .[core] && \
-    sed -i 's/select_device(self.args.device, verbose=verbose)/torch.device(self.args.device)/' /usr/local/lib/python3.12/dist-packages/ultralytics/engine/predictor.py
+    pip install -e .[core] --no-deps && \
+    pip install mineru_vl_utils==0.1.14 gradio gradio-client gradio-pdf && \
+    sed -i 's/kwargs.get("max_concurrency", 100)/kwargs.get("max_concurrency", 200)/' /llm/MinerU/mineru/backend/vlm/vlm_analyze.py && \
+    sed -i 's/kwargs.get("http_timeout", 600)/kwargs.get("http_timeout", 1200)/' /llm/MinerU/mineru/backend/vlm/vlm_analyze.py
 
 
 # ======= Add oneCCL build =======
@@ -120,7 +118,9 @@ RUN pip install accelerate hf_transfer 'modelscope!=1.15.0'
 
 
 # Pin transformers version to avoid conflict in vLLM
-RUN pip install "transformers<4.54.0"
+RUN pip install "transformers==4.57.0" && \
+    pip install librosa soundfile && \
+    pip install mineru[core]==2.5.4
 
 
 # Set additional environment for production usage
Original file line number	Diff line number	Diff line change
Expand Up		@@ -53,3 +53,5 @@ mineru-gradio --server-name 0.0.0.0 --server-port 7860
		```

		Refer to [here](https://opendatalab.github.io/MinerU/zh/usage/quick_usage/#_2) for more details.

		### Refer to [here](https://github.com/intel/llm-scaler/tree/main/vllm#243-mineru-26-support) for new version 2.6.1 of mineru-vllm, which has performance improvements.