Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions vllm/docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ RUN apt-get update -y && \

WORKDIR /llm
COPY ./patches/vllm_for_multi_arc.patch /tmp/
COPY ./patches/miner-u.patch /tmp/

# Set environment variables early
ENV LD_LIBRARY_PATH="$LD_LIBRARY_PATH:/usr/local/lib/"
Expand All @@ -57,6 +58,15 @@ RUN git clone -b v0.10.0 https://github.com/vllm-project/vllm.git && \
export CPATH=/opt/intel/oneapi/dpcpp-ct/2025.1/include/:${CPATH} && \
python3 setup.py install

# Clone + patch miner-U
RUN git clone https://github.com/opendatalab/MinerU.git && \
cd MinerU && \
git checkout de41fa58590263e43b783fe224b6d07cae290a33 && \
git apply /tmp/miner-u.patch && \
pip install -e .[core] && \
sed -i 's/select_device(self.args.device, verbose=verbose)/torch.device(self.args.device)/' /usr/local/lib/python3.12/dist-packages/ultralytics/engine/predictor.py


# ======= Add oneCCL build =======
# RUN apt-get update && apt-get install -y \
# cmake \
Expand Down
53 changes: 53 additions & 0 deletions vllm/patches/miner-u.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
diff --git a/mineru/backend/pipeline/pipeline_analyze.py b/mineru/backend/pipeline/pipeline_analyze.py
index de933059..6c421595 100644
--- a/mineru/backend/pipeline/pipeline_analyze.py
+++ b/mineru/backend/pipeline/pipeline_analyze.py
@@ -125,7 +125,7 @@ def doc_analyze(
f'Batch {index + 1}/{len(batch_images)}: '
f'{processed_images_count} pages/{len(images_with_extra_info)} pages'
)
- batch_results = batch_image_analyze(batch_image, formula_enable, table_enable)
+ batch_results = batch_image_analyze(batch_image, formula_enable, table_enable, len(images_with_extra_info))
results.extend(batch_results)

# 构建返回结果
@@ -149,7 +149,9 @@ def doc_analyze(
def batch_image_analyze(
images_with_extra_info: List[Tuple[PIL.Image.Image, bool, str]],
formula_enable=True,
- table_enable=True):
+ table_enable=True,
+ paths=0,
+ ):
# os.environ['CUDA_VISIBLE_DEVICES'] = str(idx)

from .batch_analyze import BatchAnalyze
@@ -198,6 +200,15 @@ def batch_image_analyze(
else:
enable_ocr_det_batch = True

+ batch_ratio = 16
+ min_path = int(os.getenv('MIN_ENABLE_OCR_DET_BATCH_PATH', 20))
+ if paths >= min_path:
+ enable_ocr_det_batch = True
+ print(f"enable_ocr_det_batch: {enable_ocr_det_batch}")
+
+ batch_model = BatchAnalyze(model_manager, batch_ratio, formula_enable, table_enable, enable_ocr_det_batch)
+
+
batch_model = BatchAnalyze(model_manager, batch_ratio, formula_enable, table_enable, enable_ocr_det_batch)
results = batch_model(images_with_extra_info)

diff --git a/mineru/utils/config_reader.py b/mineru/utils/config_reader.py
index f6d013ea..85a70ede 100644
--- a/mineru/utils/config_reader.py
+++ b/mineru/utils/config_reader.py
@@ -79,6 +79,8 @@ def get_device():
else:
if torch.cuda.is_available():
return "cuda"
+ elif torch.xpu.is_available():
+ return "xpu"
elif torch.backends.mps.is_available():
return "mps"
else: