In [1]:
from pathlib import Path
from paddleocr import PaddleOCRVL

# 配置项，可以根据需要改
DEVICE = "gpu:0"   # 如果没有 GPU，就改成 "cpu"
USE_LAYOUT_DETECTION = True      # 是否启用版面检测
FORMAT_BLOCK_CONTENT = True      # 是否格式化 block_content 为 Markdown

# 创建一个全局的 pipeline，后面直接拿来用
pipeline = PaddleOCRVL(
    device=DEVICE,
    use_layout_detection=USE_LAYOUT_DETECTION,
    format_block_content=FORMAT_BLOCK_CONTENT,
)

print("PaddleOCR-VL 初始化完成")


  from .autonotebook import tqdm as notebook_tqdm
[32mCreating model: ('PP-DocLayoutV2', None)[0m
[32mModel files already exist. Using cached files. To redownload, please delete the directory manually: `/home/amishor/.paddlex/official_models/PP-DocLayoutV2`.[0m
E1207 01:08:32.084872   645 gpu_resources.cc:98] Mismatched GPU Architecture: The installed PaddlePaddle package was compiled for 61 70 75 80 86 89 90 ,but your current GPU is 120 Solution: Install the correct wheel package built for your GPU from the official PaddlePaddle website: https://www.paddlepaddle.org.cn/
  result_str.replace("\(", " $ ")


RuntimeError: Unsupported GPU architecture

In [None]:
import json

def parse_image_with_vl(
    image_path: str,
    out_dir: str = "./output",
    save_files: bool = True,
):
    """
    使用全局 pipeline 解析一张图片（PNG/JPG 等），并返回解析结果对象列表。

    参数：
        image_path: 图片路径
        out_dir: 结果保存目录
        save_files: 是否把 JSON/Markdown/可视化图保存到磁盘

    返回：
        outputs: List[Result]，每个 Result 对象包含 .json / .markdown 等字段
    """
    img_path = Path(image_path)
    if not img_path.is_file():
        raise FileNotFoundError(f"Image not found: {img_path}")

    out_dir = Path(out_dir)
    out_dir.mkdir(parents=True, exist_ok=True)

    # 1. 调用 VL 模型进行解析
    outputs = pipeline.predict(input=str(img_path))

    # 2. 一般一张 PNG 只对应一个 result，这里还是按 list 处理
    for idx, res in enumerate(outputs):
        print(f"\n===== Image #{idx} 解析结果（简要打印） =====")
        # 打印成 JSON 风格，方便在 Notebook 中折叠查看
        res.print(format_json=True, indent=2, ensure_ascii=False)

        if save_files:
            # 2.1 保存 JSON
            res.save_to_json(save_path=str(out_dir))

            # 2.2 保存 Markdown
            res.save_to_markdown(save_path=str(out_dir))

            # 2.3 保存可视化图片（有版面框、表格框等）
            res.save_to_img(save_path=str(out_dir))

            # 2.4 额外再写一个“阅读顺序纯文本”文件，方便快速看内容
            json_res = res.json  # dict
            parsing_res_list = json_res.get("parsing_res_list", [])
            ordered_texts = []
            for block in parsing_res_list:
                label = block.get("block_label", "")
                content = block.get("block_content", "")
                if label == "text" and content:
                    ordered_texts.append(content)

            plain_text_path = out_dir / f"{img_path.stem}_plain.txt"
            with plain_text_path.open("w", encoding="utf-8") as f:
                for t in ordered_texts:
                    f.write(t)
                    f.write("\n\n")

            print(f"\n文件已保存到：{out_dir.resolve()}")
            print(f"- JSON: {img_path.stem}_res.json")
            print(f"- Markdown: {img_path.stem}.md")
            print(f("- 纯文本: {img_path.stem}_plain.txt"))

    return outputs


In [None]:
# 把这里换成你自己的 png 路径
test_image = "./demo.png"   # 例如：./data/page1.png

results = parse_image_with_vl(
    image_path=test_image,
    out_dir="./vl_output",
    save_files=True,   # 如果只在 notebook 看结果，可以设成 False
)
