From 23050ef92e99bb8ea3541a8fead40781845996b7 Mon Sep 17 00:00:00 2001 From: hzjane Date: Wed, 10 Sep 2025 15:20:10 +0800 Subject: [PATCH 1/2] enable miner-u --- vllm/docker/Dockerfile | 10 +++++++ vllm/patches/miner-u.patch | 53 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 63 insertions(+) create mode 100644 vllm/patches/miner-u.patch diff --git a/vllm/docker/Dockerfile b/vllm/docker/Dockerfile index 3657ad3..b389124 100644 --- a/vllm/docker/Dockerfile +++ b/vllm/docker/Dockerfile @@ -40,6 +40,7 @@ RUN apt-get update -y && \ WORKDIR /llm COPY ./patches/vllm_for_multi_arc.patch /tmp/ +COPY ./patches/miner-u.patch /tmp/ # Set environment variables early ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/local/lib/" @@ -57,6 +58,15 @@ RUN git clone -b v0.10.0 https://github.com/vllm-project/vllm.git && \ export CPATH=/opt/intel/oneapi/dpcpp-ct/2025.1/include/:${CPATH} && \ python3 setup.py install +# Clone + patch miner-U +RUN git clone https://github.com/opendatalab/MinerU.git && \ + git checkout de41fa58590263e43b783fe224b6d07cae290a33 && \ + cd MinerU && \ + git apply /tmp/miner-u.patch && \ + pip install -e .[core] && \ + sed -i 's/select_device(self.args.device, verbose=verbose)/torch.device(self.args.device)/' /usr/local/lib/python3.12/dist-packages/ultralytics/engine/predictor.py + + # ======= Add oneCCL build ======= # RUN apt-get update && apt-get install -y \ # cmake \ diff --git a/vllm/patches/miner-u.patch b/vllm/patches/miner-u.patch new file mode 100644 index 0000000..0331614 --- /dev/null +++ b/vllm/patches/miner-u.patch @@ -0,0 +1,53 @@ +diff --git a/mineru/backend/pipeline/pipeline_analyze.py b/mineru/backend/pipeline/pipeline_analyze.py +index de933059..6c421595 100644 +--- a/mineru/backend/pipeline/pipeline_analyze.py ++++ b/mineru/backend/pipeline/pipeline_analyze.py +@@ -125,7 +125,7 @@ def doc_analyze( + f'Batch {index + 1}/{len(batch_images)}: ' + f'{processed_images_count} pages/{len(images_with_extra_info)} pages' + ) +- batch_results = batch_image_analyze(batch_image, formula_enable, table_enable) ++ batch_results = batch_image_analyze(batch_image, formula_enable, table_enable, len(images_with_extra_info)) + results.extend(batch_results) + + # 构建返回结果 +@@ -149,7 +149,9 @@ def doc_analyze( + def batch_image_analyze( + images_with_extra_info: List[Tuple[PIL.Image.Image, bool, str]], + formula_enable=True, +- table_enable=True): ++ table_enable=True, ++ paths=0, ++ ): + # os.environ['CUDA_VISIBLE_DEVICES'] = str(idx) + + from .batch_analyze import BatchAnalyze +@@ -198,6 +200,15 @@ def batch_image_analyze( + else: + enable_ocr_det_batch = True + ++ batch_ratio = 16 ++ min_path = int(os.getenv('MIN_ENABLE_OCR_DET_BATCH_PATH', 20)) ++ if paths >= min_path: ++ enable_ocr_det_batch = True ++ print(f"enable_ocr_det_batch: {enable_ocr_det_batch}") ++ ++ batch_model = BatchAnalyze(model_manager, batch_ratio, formula_enable, table_enable, enable_ocr_det_batch) ++ ++ + batch_model = BatchAnalyze(model_manager, batch_ratio, formula_enable, table_enable, enable_ocr_det_batch) + results = batch_model(images_with_extra_info) + +diff --git a/mineru/utils/config_reader.py b/mineru/utils/config_reader.py +index f6d013ea..85a70ede 100644 +--- a/mineru/utils/config_reader.py ++++ b/mineru/utils/config_reader.py +@@ -79,6 +79,8 @@ def get_device(): + else: + if torch.cuda.is_available(): + return "cuda" ++ elif torch.xpu.is_available(): ++ return "xpu" + elif torch.backends.mps.is_available(): + return "mps" + else: \ No newline at end of file From 97f520d10e5d59be298ae2c9e39c2ed09092e305 Mon Sep 17 00:00:00 2001 From: hzjane Date: Wed, 10 Sep 2025 15:25:46 +0800 Subject: [PATCH 2/2] update --- vllm/docker/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/vllm/docker/Dockerfile b/vllm/docker/Dockerfile index b389124..5c460e6 100644 --- a/vllm/docker/Dockerfile +++ b/vllm/docker/Dockerfile @@ -60,8 +60,8 @@ RUN git clone -b v0.10.0 https://github.com/vllm-project/vllm.git && \ # Clone + patch miner-U RUN git clone https://github.com/opendatalab/MinerU.git && \ - git checkout de41fa58590263e43b783fe224b6d07cae290a33 && \ cd MinerU && \ + git checkout de41fa58590263e43b783fe224b6d07cae290a33 && \ git apply /tmp/miner-u.patch && \ pip install -e .[core] && \ sed -i 's/select_device(self.args.device, verbose=verbose)/torch.device(self.args.device)/' /usr/local/lib/python3.12/dist-packages/ultralytics/engine/predictor.py