In [10]:
from docling.datamodel.base_models import InputFormat
from docling.document_converter import DocumentConverter, PdfFormatOption, ImageFormatOption
from docling.pipeline.vlm_pipeline import VlmPipeline
from docling.datamodel.pipeline_options import (
    VlmPipelineOptions,
    PdfPipelineOptions
)
from docling.datamodel.pipeline_options import (TesseractOcrOptions, EasyOcrOptions)
from docling.datamodel import vlm_model_specs

vlm_models = [
    (vlm_model_specs.SMOLDOCLING_MLX, "SMOLDOCLING_MLX"),
    (vlm_model_specs.SMOLDOCLING_TRANSFORMERS, "SMOLDOCLING_TRANSFORMERS"),
    (vlm_model_specs.GRANITE_VISION_TRANSFORMERS, "GRANITE_VISION_TRANSFORMERS"),
    (vlm_model_specs.GRANITE_VISION_OLLAMA, "GRANITE_VISION_OLLAMA"),
    (vlm_model_specs.PIXTRAL_12B_TRANSFORMERS, "PIXTRAL_12B_TRANSFORMERS"),
    (vlm_model_specs.PIXTRAL_12B_MLX, "PIXTRAL_12B_MLX"),
    (vlm_model_specs.PHI4_TRANSFORMERS, "PHI4_TRANSFORMERS"),
    (vlm_model_specs.QWEN25_VL_3B_MLX, "QWEN25_VL_3B_MLX"),
    (vlm_model_specs.GEMMA3_12B_MLX, "GEMMA3_12B_MLX"),
    (vlm_model_specs.GEMMA3_27B_MLX, "GEMMA3_27B_MLX")
]

def test_vlm(source, model):
    pipeline_options = VlmPipelineOptions(
        vlm_options=model[0]
    )

    converter = DocumentConverter(
        format_options={
            InputFormat.PDF: PdfFormatOption(
                pipeline_cls=VlmPipeline,
                pipeline_options=pipeline_options,
            ),
            InputFormat.IMAGE: ImageFormatOption(
                pipeline_cls=VlmPipeline,
                pipeline_options=pipeline_options,
            ),
        }
    )
    
    doc = converter.convert(source=source).document.export_to_markdown()
    with open(f"markdown_output_{model[1]}.txt", "w") as f:
        f.write(doc)
    

def test_ocr(source, use_easyocr=True):
    pipeline_options = PdfPipelineOptions()
    pipeline_options.do_ocr = True
    pipeline_options.do_table_structure = True
    pipeline_options.table_structure_options.do_cell_matching = True
    if use_easyocr:
        pipeline_options.ocr_options = EasyOcrOptions()
    else:
        pipeline_options.ocr_options = TesseractOcrOptions()

    converter = DocumentConverter(
        format_options={
            InputFormat.PDF: PdfFormatOption(pipeline_options=pipeline_options),
            InputFormat.IMAGE: ImageFormatOption(pipeline_options=pipeline_options)
        }
    )

    doc = converter.convert(source=source).document.export_to_markdown()
    with open(f"markdown_output_{'easyocr' if use_easyocr else 'tesseract'}.txt", "w") as f:
        f.write(doc)



In [None]:
source = "sample_invoice_1.png"
test_ocr(source, use_easyocr=True)
test_ocr(source, use_easyocr=False)

In [13]:
source = "sample_invoice_1.png"
test_vlm(source, vlm_models[2])  # Test with the first VLM model

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`
Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor wit

KeyboardInterrupt: 