diff --git a/docling_eval/cli/main.py b/docling_eval/cli/main.py
index d9a4ffac..e1b566f7 100644
--- a/docling_eval/cli/main.py
+++ b/docling_eval/cli/main.py
@@ -73,6 +73,13 @@
 
 # Configure logging
 logging.getLogger("docling").setLevel(logging.WARNING)
+logging.getLogger("PIL").setLevel(logging.WARNING)
+logging.getLogger("transformers").setLevel(logging.WARNING)
+logging.getLogger("datasets").setLevel(logging.WARNING)
+logging.getLogger("filelock").setLevel(logging.WARNING)
+logging.getLogger("urllib3").setLevel(logging.WARNING)
+logging.getLogger("docling_ibm_models").setLevel(logging.WARNING)
+
 _log = logging.getLogger(__name__)
 
 app = typer.Typer(
@@ -188,14 +195,17 @@ def get_dataset_builder(
             name="CVAT", dataset_source=dataset_source, target=target, split=split
         )
     elif benchmark == BenchMarkNames.PLAIN_FILES:
-        assert dataset_source is not None
+        if dataset_source is None:
+            raise ValueError("dataset_source is required for PLAIN_FILES")
+
         return FileDatasetBuilder(
             name=dataset_source.name,
             dataset_source=dataset_source,
             target=target,
             split=split,
+            begin_index=begin_index,
+            end_index=end_index,
         )
-
     else:
         raise ValueError(f"Unsupported benchmark: {benchmark}")
 
@@ -209,7 +219,11 @@ def get_prediction_provider(
 ):
     pipeline_options: PaginatedPipelineOptions
     """Get the appropriate prediction provider with default settings."""
-    if provider_type == PredictionProviderType.DOCLING:
+    if (
+        provider_type == PredictionProviderType.DOCLING
+        or provider_type == PredictionProviderType.OCR_DOCLING
+        or provider_type == PredictionProviderType.EasyOCR_DOCLING
+    ):
         ocr_factory = get_ocr_factory()
 
         ocr_options: OcrOptions = ocr_factory.create_options(  # type: ignore
@@ -238,6 +252,78 @@ def get_prediction_provider(
             ignore_missing_predictions=True,
         )
 
+    elif provider_type == PredictionProviderType.MacOCR_DOCLING:
+        ocr_factory = get_ocr_factory()
+
+        ocr_options: OcrOptions = ocr_factory.create_options(  # type: ignore
+            kind="ocrmac",
+        )
+
+        pipeline_options = PdfPipelineOptions(
+            do_ocr=True,
+            ocr_options=ocr_options,
+            do_table_structure=True,
+        )
+
+        pipeline_options.images_scale = 2.0
+        pipeline_options.generate_page_images = True
+        pipeline_options.generate_picture_images = True
+
+        if artifacts_path is not None:
+            pipeline_options.artifacts_path = artifacts_path
+
+        return DoclingPredictionProvider(
+            format_options={
+                InputFormat.PDF: PdfFormatOption(pipeline_options=pipeline_options),
+                InputFormat.IMAGE: PdfFormatOption(pipeline_options=pipeline_options),
+            },
+            do_visualization=do_visualization,
+            ignore_missing_predictions=True,
+        )
+
+    elif provider_type == PredictionProviderType.PDF_DOCLING:
+
+        ocr_factory = get_ocr_factory()
+
+        ocr_options: OcrOptions = ocr_factory.create_options(  # type: ignore
+            kind="easyocr",
+        )
+
+        pdf_pipeline_options = PdfPipelineOptions(
+            do_ocr=False,
+            ocr_options=ocr_options,  # we need to provide OCR options in order to not break the parquet serialization
+            do_table_structure=True,
+        )
+
+        pdf_pipeline_options.images_scale = 2.0
+        pdf_pipeline_options.generate_page_images = True
+        pdf_pipeline_options.generate_picture_images = True
+
+        ocr_pipeline_options = PdfPipelineOptions(
+            do_ocr=True,
+            ocr_options=ocr_options,  # we need to provide OCR options in order to not break the parquet serialization
+            do_table_structure=True,
+        )
+
+        ocr_pipeline_options.images_scale = 2.0
+        ocr_pipeline_options.generate_page_images = True
+        ocr_pipeline_options.generate_picture_images = True
+
+        if artifacts_path is not None:
+            pdf_pipeline_options.artifacts_path = artifacts_path
+            ocr_pipeline_options.artifacts_path = artifacts_path
+
+        return DoclingPredictionProvider(
+            format_options={
+                InputFormat.PDF: PdfFormatOption(pipeline_options=pdf_pipeline_options),
+                InputFormat.IMAGE: PdfFormatOption(
+                    pipeline_options=ocr_pipeline_options
+                ),
+            },
+            do_visualization=do_visualization,
+            ignore_missing_predictions=True,
+        )
+
     elif provider_type == PredictionProviderType.SMOLDOCLING:
         pipeline_options = VlmPipelineOptions()
 
@@ -614,9 +700,14 @@ def create_cvat(
     output_dir: Annotated[Path, typer.Option(help="Output directory")],
     gt_dir: Annotated[Path, typer.Option(help="Dataset source path")],
     bucket_size: Annotated[int, typer.Option(help="Size of CVAT tasks")] = 20,
+    use_predictions: Annotated[bool, typer.Option(help="use predictions")] = False,
 ):
+    """Create dataset ready to upload to CVAT starting from (ground-truth) dataset."""
     builder = CvatPreannotationBuilder(
-        dataset_source=gt_dir, target=output_dir, bucket_size=bucket_size
+        dataset_source=gt_dir,
+        target=output_dir,
+        bucket_size=bucket_size,
+        use_predictions=use_predictions,
     )
     builder.prepare_for_annotation()
 
diff --git a/docling_eval/datamodels/types.py b/docling_eval/datamodels/types.py
index 93a5011f..120ee414 100644
--- a/docling_eval/datamodels/types.py
+++ b/docling_eval/datamodels/types.py
@@ -118,6 +118,11 @@ class PredictionProviderType(str, Enum):
     """Types of prediction providers available."""
 
     DOCLING = "Docling"
+    PDF_DOCLING = "PDF_Docling"
+    OCR_DOCLING = "OCR_Docling"
+    MacOCR_DOCLING = "MacOCR_Docling"
+    EasyOCR_DOCLING = "EasyOCR_Docling"
+
     TABLEFORMER = "TableFormer"
     FILE = "File"
     SMOLDOCLING = "SmolDocling"
diff --git a/docling_eval/dataset_builders/dataset_builder.py b/docling_eval/dataset_builders/dataset_builder.py
index a3b96a5e..012c95fd 100644
--- a/docling_eval/dataset_builders/dataset_builder.py
+++ b/docling_eval/dataset_builders/dataset_builder.py
@@ -7,6 +7,7 @@
 
 import ibm_boto3  # type: ignore
 from docling.utils.utils import chunkify
+from docling_core.types.doc.document import ImageRefMode
 from huggingface_hub import snapshot_download
 from pydantic import BaseModel
 
@@ -15,7 +16,6 @@
     TRUE_HTML_EXPORT_LABELS,
 )
 from docling_eval.utils.utils import save_shard_to_disk, write_datasets_info
-from docling_eval.visualisation.visualisations import save_inspection_html
 
 # Get logger
 _log = logging.getLogger(__name__)
@@ -276,10 +276,11 @@ def save_to_disk(
                 record_list.append(r.as_record_dict())
                 if do_visualization:
                     viz_path = self.target / "visualizations" / f"{r.doc_id}.html"
-                    save_inspection_html(
+                    r.ground_truth_doc.save_as_html(
                         filename=viz_path,
-                        doc=r.ground_truth_doc,
                         labels=TRUE_HTML_EXPORT_LABELS,
+                        image_mode=ImageRefMode.EMBEDDED,
+                        split_page_view=True,
                     )
 
             save_shard_to_disk(
diff --git a/docling_eval/dataset_builders/file_dataset_builder.py b/docling_eval/dataset_builders/file_dataset_builder.py
index d32a9b4e..111bd19f 100644
--- a/docling_eval/dataset_builders/file_dataset_builder.py
+++ b/docling_eval/dataset_builders/file_dataset_builder.py
@@ -100,7 +100,7 @@ def iterate(self) -> Iterable[DatasetRecord]:
 
         for filename in tqdm(
             selected_filenames,
-            desc="Processing files for DP-Bench",
+            desc=f"Processing files for {self.name}",
             ncols=128,
         ):
             mime_type, _ = mimetypes.guess_type(filename)
@@ -108,6 +108,7 @@ def iterate(self) -> Iterable[DatasetRecord]:
             # Create the ground truth Document
             true_doc = DoclingDocument(name=f"{filename}")
             if mime_type == "application/pdf":
+                _log.info(f"add_pages_to_true_doc: {filename}")
                 true_doc, _ = add_pages_to_true_doc(
                     pdf_path=filename, true_doc=true_doc, image_scale=2.0
                 )
@@ -126,6 +127,7 @@ def iterate(self) -> Iterable[DatasetRecord]:
                     image=image_ref,
                 )
 
+                _log.info(f"add_pages_to_true_doc: {filename}")
                 true_doc.pages[1] = page_item
             else:
                 raise ValueError(
@@ -139,18 +141,20 @@ def iterate(self) -> Iterable[DatasetRecord]:
                 page_images_column=BenchMarkColumns.GROUNDTRUTH_PAGE_IMAGES.value,
             )
 
-            # Get PDF as binary data
-            pdf_bytes = get_binary(filename)
-            pdf_stream = DocumentStream(name=filename.name, stream=BytesIO(pdf_bytes))
+            # Get source as binary data
+            source_bytes = get_binary(filename)
+            source_stream = DocumentStream(
+                name=filename.name, stream=BytesIO(source_bytes)
+            )
 
             # Create dataset record
             record = DatasetRecord(
                 doc_id=str(filename.name),
-                doc_hash=get_binhash(pdf_bytes),
+                doc_hash=get_binhash(source_bytes),
                 ground_truth_doc=true_doc,
                 ground_truth_pictures=true_pictures,
                 ground_truth_page_images=true_page_images,
-                original=pdf_stream,
+                original=source_stream,
                 mime_type=mime_type,
             )
 
diff --git a/docling_eval/evaluators/readingorder_evaluator.py b/docling_eval/evaluators/readingorder_evaluator.py
index 3c3fa364..01c64fb5 100644
--- a/docling_eval/evaluators/readingorder_evaluator.py
+++ b/docling_eval/evaluators/readingorder_evaluator.py
@@ -292,7 +292,7 @@ def _show_items(self, true_doc: DoclingDocument):
             )
             text = item.text if isinstance(item, TextItem) else None
             label = item.label  # type: ignore
-            print(f"True {i}: {level} - {label}: {bbox} - {text}")
+            # print(f"True {i}: {level} - {label}: {bbox} - {text}")
 
 
 class ReadingOrderVisualizer:
diff --git a/docling_eval/prediction_providers/base_prediction_provider.py b/docling_eval/prediction_providers/base_prediction_provider.py
index 996cafdd..d280ed98 100644
--- a/docling_eval/prediction_providers/base_prediction_provider.py
+++ b/docling_eval/prediction_providers/base_prediction_provider.py
@@ -165,7 +165,6 @@ def visualize_results(
                 / f"{prediction_record.doc_id}.html",
                 true_doc=gt_doc,
                 pred_doc=pred_doc,
-                page_image=prediction_record.ground_truth_page_images[0],
                 true_labels=self.true_labels,
                 pred_labels=self.pred_labels,
                 draw_reading_order=True,
diff --git a/docling_eval/prediction_providers/docling_provider.py b/docling_eval/prediction_providers/docling_provider.py
index 880e58b4..b86b619b 100644
--- a/docling_eval/prediction_providers/docling_provider.py
+++ b/docling_eval/prediction_providers/docling_provider.py
@@ -106,7 +106,7 @@ def predict(self, record: DatasetRecord) -> DatasetRecordWithPrediction:
     def info(self) -> Dict:
         """Get information about the prediction provider."""
 
-        return {
+        result = {
             "asset": PredictionProviderType.DOCLING,
             "version": docling_version(),
             "package_versions": {
@@ -128,10 +128,11 @@ def info(self) -> Dict:
                             mode="json", exclude_defaults=True
                         )
                         if v.pipeline_options is not None
-                        else {}
+                        else None  # Parquet might not like empty dicts!
                     ),
                 }
                 for k, v in self.doc_converter.format_to_options.items()
                 if k in [InputFormat.PDF, InputFormat.IMAGE]
             },
         }
+        return result
diff --git a/docling_eval/visualisation/constants.py b/docling_eval/visualisation/constants.py
index a0a04e3c..823c258a 100644
--- a/docling_eval/visualisation/constants.py
+++ b/docling_eval/visualisation/constants.py
@@ -309,7 +309,7 @@
             display: flex;
             flex-direction: column;
             width: 25%; /* Adjust the width of each item */
-            height: 100%; /* Adjust height to fill parent container */
+            height: 50%; /* Adjust height to fill parent container */
             border: 1px solid #ccc; /* Optional: Add borders */
             box-shadow: 2px 2px 5px rgba(0, 0, 0, 0.1); /* Optional: Add shadow */
             background-color: #fff; /* Optional: Add background */
diff --git a/docling_eval/visualisation/visualisations.py b/docling_eval/visualisation/visualisations.py
index 330c634a..f782fca6 100644
--- a/docling_eval/visualisation/visualisations.py
+++ b/docling_eval/visualisation/visualisations.py
@@ -1,10 +1,16 @@
 import copy
 import logging
+import re
 from pathlib import Path
 from typing import Set
 
 from docling.datamodel.base_models import BoundingBox, Cluster
 from docling.utils.visualization import draw_clusters
+from docling_core.experimental.serializer.html import (
+    HTMLDocSerializer,
+    HTMLOutputStyle,
+    HTMLParams,
+)
 from docling_core.types.doc.document import (
     ContentLayer,
     DocItem,
@@ -23,54 +29,6 @@
 )
 
 
-def save_comparison_html(
-    filename: Path,
-    true_doc: DoclingDocument,
-    pred_doc: DoclingDocument,
-    page_image: Image.Image,
-    true_labels: Set[DocItemLabel],
-    pred_labels: Set[DocItemLabel],
-):
-
-    true_doc_html = true_doc.export_to_html(
-        image_mode=ImageRefMode.EMBEDDED,
-        html_head=HTML_DEFAULT_HEAD_FOR_COMP,
-        labels=true_labels,
-    )
-
-    pred_doc_html = pred_doc.export_to_html(
-        image_mode=ImageRefMode.EMBEDDED,
-        html_head=HTML_DEFAULT_HEAD_FOR_COMP,
-        labels=pred_labels,
-    )
-
-    # since the string in srcdoc are wrapped by ', we need to replace all ' by it HTML convention
-    true_doc_html = true_doc_html.replace("'", "&#39;")
-    pred_doc_html = pred_doc_html.replace("'", "&#39;")
-
-    image_base64 = from_pil_to_base64(page_image)
-
-    """
-    # Convert the image to a bytes object
-    buffered = io.BytesIO()
-    page_image.save(
-        buffered, format="PNG"
-    )  # Specify the format (e.g., JPEG, PNG, etc.)
-    image_bytes = buffered.getvalue()
-
-    # Encode the bytes to a Base64 string
-    image_base64 = base64.b64encode(image_bytes).decode("utf-8")
-    """
-
-    comparison_page = copy.deepcopy(HTML_COMPARISON_PAGE)
-    comparison_page = comparison_page.replace("BASE64PAGE", image_base64)
-    comparison_page = comparison_page.replace("TRUEDOC", true_doc_html)
-    comparison_page = comparison_page.replace("PREDDOC", pred_doc_html)
-
-    with open(str(filename), "w") as fw:
-        fw.write(comparison_page)
-
-
 def draw_arrow(
     draw: ImageDraw.ImageDraw,
     arrow_coords: tuple[float, float, float, float],
@@ -117,296 +75,151 @@ def draw_arrow(
     return draw
 
 
-def draw_clusters_with_reading_order(
-    doc: DoclingDocument,
-    page_image: Image.Image,
-    labels: Set[DocItemLabel],
-    page_no: int = 1,
-    reading_order: bool = True,
-):
-
-    # img = copy.deepcopy(page_image)
-    img = page_image.copy()
-    draw = ImageDraw.Draw(img)
-
-    # Load a font (adjust the font size and path as needed)
-    font = ImageFont.load_default()
-    try:
-        font = ImageFont.truetype("arial.ttf", size=15)
-    except IOError:
-        font = ImageFont.load_default()
-
-    x0, y0 = None, None
-
-    for item, level in doc.iterate_items(
-        included_content_layers={ContentLayer.BODY, ContentLayer.FURNITURE}
-    ):
-        if isinstance(item, DocItem):  # and item.label in labels:
-            for prov in item.prov:
-
-                if page_no != prov.page_no:
-                    continue
-
-                bbox = prov.bbox.to_top_left_origin(
-                    page_height=doc.pages[prov.page_no].size.height
-                )
-                bbox = bbox.normalized(doc.pages[prov.page_no].size)
-
-                bbox.l = round(bbox.l * img.width)
-                bbox.r = round(bbox.r * img.width)
-                bbox.t = round(bbox.t * img.height)
-                bbox.b = round(bbox.b * img.height)
-
-                if bbox.b > bbox.t:
-                    bbox.b, bbox.t = bbox.t, bbox.b
-
-                if not reading_order:
-                    x0, y0 = None, None
-                elif x0 is None and y0 is None:
-                    x0 = (bbox.l + bbox.r) / 2.0
-                    y0 = (bbox.b + bbox.t) / 2.0
-                else:
-                    assert x0 is not None
-                    assert y0 is not None
-
-                    x1 = (bbox.l + bbox.r) / 2.0
-                    y1 = (bbox.b + bbox.t) / 2.0
-
-                    # Arrow parameters
-                    start_point = (x0, y0)  # Starting point of the arrow
-                    end_point = (x1, y1)  # Ending point of the arrow
-                    arrowhead_length = 20  # Length of the arrowhead
-                    arrowhead_width = 10  # Width of the arrowhead
-
-                    arrow_color = "red"
-                    line_width = 2
-
-                    # Draw the arrow shaft (line)
-                    draw.line(
-                        [start_point, end_point], fill=arrow_color, width=line_width
-                    )
-
-                    # Calculate the arrowhead points
-                    dx = end_point[0] - start_point[0]
-                    dy = end_point[1] - start_point[1]
-                    angle = (dx**2 + dy**2) ** 0.5 + 0.01  # Length of the arrow shaft
-
-                    # Normalized direction vector for the arrow shaft
-                    ux, uy = dx / angle, dy / angle
-
-                    # Base of the arrowhead
-                    base_x = end_point[0] - ux * arrowhead_length
-                    base_y = end_point[1] - uy * arrowhead_length
-
-                    # Left and right points of the arrowhead
-                    left_x = base_x - uy * arrowhead_width
-                    left_y = base_y + ux * arrowhead_width
-                    right_x = base_x + uy * arrowhead_width
-                    right_y = base_y - ux * arrowhead_width
-
-                    # Draw the arrowhead (triangle)
-                    draw.polygon(
-                        [end_point, (left_x, left_y), (right_x, right_y)],
-                        fill=arrow_color,
-                    )
-
-                    x0, y0 = x1, y1
-
-                # Draw rectangle with only a border
-                rectangle_color = "blue"
-                border_width = 1
-                draw.rectangle(
-                    [bbox.l, bbox.b, bbox.r, bbox.t],
-                    outline=rectangle_color,
-                    width=border_width,
-                )
-
-                # Calculate label size using getbbox
-                text_bbox = font.getbbox(str(item.label))
-                label_width = text_bbox[2] - text_bbox[0]
-                label_height = text_bbox[3] - text_bbox[1]
-                label_x = bbox.l
-                label_y = (
-                    bbox.b - label_height
-                )  # - 5  # Place the label above the rectangle
-
-                # Draw label text
-                draw.text(
-                    (label_x, label_y),
-                    str(item.label),
-                    fill=rectangle_color,
-                    font=font,
-                )
-
-    return img
-
-
 def save_comparison_html_with_clusters(
     filename: Path,
     true_doc: DoclingDocument,
     pred_doc: DoclingDocument,
-    page_image: Image.Image,
     true_labels: Set[DocItemLabel],
     pred_labels: Set[DocItemLabel],
     draw_reading_order: bool = True,
 ):
-    if (1 not in true_doc.pages) or (1 not in pred_doc.pages):
-        logging.error(f"1 not in true_doc.pages -> skipping {filename} ")
-        return
-
-    def draw_doc_layout(doc: DoclingDocument, image: Image.Image):
-        r"""
-        Draw the document clusters and optionaly the reading order
-        """
-        clusters = []
-        for idx, (elem, _) in enumerate(
-            doc.iterate_items(
-                included_content_layers={ContentLayer.BODY, ContentLayer.FURNITURE}
-            )
-        ):
-            if not isinstance(elem, DocItem):
-                continue
-            if len(elem.prov) == 0:
-                continue  # Skip elements without provenances
-            prov = elem.prov[0]
-
-            if prov.page_no not in true_doc.pages or prov.page_no != 1:
-                logging.error(f"{prov.page_no} not in true_doc.pages -> skipping! ")
-                continue
-
-            tlo_bbox = prov.bbox.to_top_left_origin(
-                page_height=doc.pages[prov.page_no].size.height
-            )
-            cluster = Cluster(
-                id=idx,
-                label=elem.label,
-                bbox=BoundingBox.model_validate(tlo_bbox),
-                cells=[],
-            )
-            clusters.append(cluster)
-
-        scale_x = image.width / doc.pages[1].size.width
-        scale_y = image.height / doc.pages[1].size.height
-        draw_clusters(image, clusters, scale_x, scale_y)
+    """Save comparison html with clusters."""
 
-        return image
+    def get_missing_pageimg(width=800, height=1100, text="MISSING PAGE"):
+        """Get missing page imgage."""
+        import numpy as np
+        from PIL import Image, ImageDraw, ImageFont
 
-    def draw_doc_reading_order(doc: DoclingDocument, image: Image.Image):
-        r"""
-        Draw the reading order
-        """
+        # Create a white background image
+        image = Image.new("RGB", (width, height), color="white")
         draw = ImageDraw.Draw(image)
-        x0, y0 = None, None
-
-        for elem, _ in doc.iterate_items(
-            included_content_layers={ContentLayer.BODY, ContentLayer.FURNITURE}
-        ):
-            if not isinstance(elem, DocItem):
-                continue
-            if len(elem.prov) == 0:
-                continue  # Skip elements without provenances
-            prov = elem.prov[0]
-
-            if prov.page_no not in true_doc.pages or prov.page_no != 1:
-                logging.error(f"{prov.page_no} not in true_doc.pages -> skipping! ")
-                continue
-
-            tlo_bbox = prov.bbox.to_top_left_origin(
-                page_height=doc.pages[prov.page_no].size.height
-            )
-            ro_bbox = tlo_bbox.normalized(doc.pages[prov.page_no].size)
-            ro_bbox.l = round(ro_bbox.l * image.width)
-            ro_bbox.r = round(ro_bbox.r * image.width)
-            ro_bbox.t = round(ro_bbox.t * image.height)
-            ro_bbox.b = round(ro_bbox.b * image.height)
-
-            if ro_bbox.b > ro_bbox.t:
-                ro_bbox.b, ro_bbox.t = ro_bbox.t, ro_bbox.b
-
-            if x0 is None and y0 is None:
-                x0 = (ro_bbox.l + ro_bbox.r) / 2.0
-                y0 = (ro_bbox.b + ro_bbox.t) / 2.0
-            else:
-                assert x0 is not None
-                assert y0 is not None
-
-                x1 = (ro_bbox.l + ro_bbox.r) / 2.0
-                y1 = (ro_bbox.b + ro_bbox.t) / 2.0
-
-                draw = draw_arrow(
-                    draw,
-                    (x0, y0, x1, y1),
-                    line_width=2,
-                    color="red",
-                )
-                x0, y0 = x1, y1
+
+        # Try to use a standard font or fall back to default
+        try:
+            # For larger installations, you might have Arial or other fonts
+            font = ImageFont.truetype("arial.ttf", size=60)
+        except IOError:
+            # Fall back to default font
+            font = ImageFont.load_default().font_variant(size=60)
+
+        # Get text size to center it
+        text_width, text_height = (
+            draw.textsize(text, font=font)
+            if hasattr(draw, "textsize")
+            else (draw.textlength(text, font=font), font.size)
+        )
+
+        # Position for the text (centered and angled)
+        position = ((width - text_width) // 2, (height - text_height) // 2)
+
+        # Draw the watermark text (light gray and rotated)
+        draw.text(position, text, fill=(200, 200, 200), font=font)
+
+        # Rotate the image 45 degrees to create diagonal watermark effect
+        image = image.rotate(45, expand=False, fillcolor="white")
+
         return image
 
-    # HTML rendering
-    true_doc_html = true_doc.export_to_html(
-        image_mode=ImageRefMode.EMBEDDED,
-        html_head=HTML_DEFAULT_HEAD_FOR_COMP,
-        labels=true_labels,
-    )
+    true_page_imgs = true_doc.get_visualization(show_label=False)
+    pred_page_imgs = pred_doc.get_visualization(show_label=False)
 
-    pred_doc_html = pred_doc.export_to_html(
-        image_mode=ImageRefMode.EMBEDDED,
-        html_head=HTML_DEFAULT_HEAD_FOR_COMP,
-        labels=pred_labels,
+    true_page_nos = true_page_imgs.keys()
+    pred_page_nos = pred_page_imgs.keys()
+
+    if true_page_nos != pred_page_nos:
+        logging.error(
+            f"incompatible true_page_nos versus pred_page_nos: \ntrue_page_nos: {true_page_nos}\npred_page_nos: {pred_page_nos}"
+        )
+
+    page_nos = true_page_nos | pred_page_nos
+
+    html_parts = [
+        "<!DOCTYPE html>",
+        "<html>",
+        HTML_DEFAULT_HEAD_FOR_COMP,
+        "<body>",
+    ]
+
+    html_parts.append("<table>")
+    html_parts.append("<tbody>")
+
+    # Compile a regular expression pattern to match content within <body> tags
+    pattern = re.compile(
+        r"<body[^>]*>\n<div class='page'>(.*?)</div>\n</body>",
+        re.DOTALL | re.IGNORECASE,
     )
 
-    # since the string in srcdoc are wrapped by ', we need to replace all ' by it HTML convention
-    true_doc_html = true_doc_html.replace("'", "&#39;")
-    pred_doc_html = pred_doc_html.replace("'", "&#39;")
+    for page_no in page_nos:
 
-    true_doc_img = draw_doc_layout(true_doc, copy.deepcopy(page_image))
-    pred_doc_img = draw_doc_layout(pred_doc, copy.deepcopy(page_image))
+        if page_no in true_page_imgs:
+            true_doc_img_b64 = from_pil_to_base64(true_page_imgs[page_no])
+        else:
+            logging.error(f"{page_no} not in true_page_imgs, get default image.")
+            true_doc_img_b64 = from_pil_to_base64(get_missing_pageimg())
 
-    if draw_reading_order:
-        true_doc_img = draw_doc_reading_order(true_doc, true_doc_img)
-        pred_doc_img = draw_doc_reading_order(pred_doc, pred_doc_img)
+        if page_no in pred_page_imgs:
+            pred_doc_img_b64 = from_pil_to_base64(pred_page_imgs[page_no])
+        else:
+            logging.error(f"{page_no} not in pred_page_imgs, get default image.")
+            pred_doc_img_b64 = from_pil_to_base64(get_missing_pageimg())
 
-    true_doc_img_b64 = from_pil_to_base64(true_doc_img)
-    pred_doc_img_b64 = from_pil_to_base64(pred_doc_img)
+        true_doc_page = true_doc.export_to_html(
+            image_mode=ImageRefMode.EMBEDDED, page_no=page_no
+        )
+        pred_doc_page = pred_doc.export_to_html(
+            image_mode=ImageRefMode.EMBEDDED, page_no=page_no
+        )
 
-    comparison_page = copy.deepcopy(HTML_COMPARISON_PAGE_WITH_CLUSTERS)
-    comparison_page = comparison_page.replace("BASE64TRUEPAGE", true_doc_img_b64)
-    comparison_page = comparison_page.replace("TRUEDOC", true_doc_html)
-    comparison_page = comparison_page.replace("BASE64PREDPAGE", pred_doc_img_b64)
-    comparison_page = comparison_page.replace("PREDDOC", pred_doc_html)
+        # Search for the pattern in the HTML string
+        mtch = pattern.search(true_doc_page)
+        if mtch:
+            true_doc_page_body = mtch.group(1).strip()
+        else:
+            logging.error(f"could not find body in true_doc_page")
+            true_doc_page_body = "<p>Nothing Found</p>"
 
-    with open(str(filename), "w") as fw:
-        fw.write(comparison_page)
+        # Search for the pattern in the HTML string
+        mtch = pattern.search(pred_doc_page)
+        if mtch:
+            pred_doc_page_body = mtch.group(1).strip()
+        else:
+            logging.error(f"could not find body in pred_doc_page")
+            pred_doc_page_body = "<p>Nothing Found</p>"
 
+        if len(true_doc_page_body) == 0:
+            true_doc_page_body = "<p>Nothing Found</p>"
 
-def save_inspection_html(
-    filename: Path, doc: DoclingDocument, labels: Set[DocItemLabel]
-):
+        if len(pred_doc_page_body) == 0:
+            pred_doc_page_body = "<p>Nothing Found</p>"
+
+        html_parts.append("<tr>")
+
+        html_parts.append("<td>")
+        html_parts.append(f'<img src="data:image/png;base64,{true_doc_img_b64}">')
+        html_parts.append("</td>")
+
+        html_parts.append("<td>")
+        html_parts.append(f"<div class='page'>\n{true_doc_page_body}\n</div>")
+        html_parts.append("</td>")
 
-    html_doc = doc.export_to_html(image_mode=ImageRefMode.EMBEDDED, labels=labels)
-    html_doc = html_doc.replace("'", "&#39;")
+        html_parts.append("<td>")
+        html_parts.append(f'<img src="data:image/png;base64,{pred_doc_img_b64}">')
+        html_parts.append("</td>")
 
-    page_images = []
-    page_template = '<div class="image-wrapper"><img src="data:image/png;base64,BASE64PAGE" alt="Example Image"></div>'
-    for page_no, page in doc.pages.items():
-        # page_img = page.image.pil_image
+        html_parts.append("<td>")
+        html_parts.append(f"<div class='page'>\n{pred_doc_page_body}\n</div>")
+        html_parts.append("</td>")
 
-        if page.image is not None and page.image.pil_image is not None:
+        html_parts.append("</tr>")
 
-            page_img = draw_clusters_with_reading_order(
-                doc=doc,
-                page_image=page.image.pil_image,
-                labels=labels,
-                page_no=page_no,
-                reading_order=True,
-            )
+    html_parts.append("</tbody>")
+    html_parts.append("</table>")
 
-            page_base64 = from_pil_to_base64(page_img)
-            page_images.append(page_template.replace("BASE64PAGE", page_base64))
+    # Close HTML structure
+    html_parts.extend(["</body>", "</html>"])
 
-    html_viz = copy.deepcopy(HTML_INSPECTION)
-    html_viz = html_viz.replace("PREDDOC", html_doc)
-    html_viz = html_viz.replace("PAGE_IMAGES", "\n".join(page_images))
+    # Join with newlines
+    html_content = "\n".join(html_parts)
 
     with open(str(filename), "w") as fw:
-        fw.write(html_viz)
+        fw.write(html_content)
diff --git a/docs/CVAT_create_groundtruth.md b/docs/CVAT_create_groundtruth.md
index e0e9925e..20d3d074 100644
--- a/docs/CVAT_create_groundtruth.md
+++ b/docs/CVAT_create_groundtruth.md
@@ -17,6 +17,12 @@ Alternatively, you can first create a plain dataset from a folder of PDF or imag
 docling_eval create-gt --benchmark PlainFiles --dataset-source ./tests/data/files --output-dir ./benchmarks/my_dataset/
 ```
 
+If you want to pre-annotate the dataset (eg with Docling), then use
+
+```shell
+docling_eval create --benchmark PlainFiles --dataset-source ./tests/data/files --output-dir ./benchmarks/my_dataset/ --prediction-provider Docling
+```
+
 ## Pre-annotation with Docling
 
 Now you can create the files you need to upload to [CVAT](https://www.cvat.ai/). These files will be created using the `create-cvat` function of the `docling_eval` CLI.
@@ -26,19 +32,21 @@ Now you can create the files you need to upload to [CVAT](https://www.cvat.ai/).
                                                                                                                                                                     
  Usage: docling_eval create-cvat [OPTIONS]                                                                                                                          
                                                                                                                                                                     
-╭─ Options ────────────────────────────────────────────────────────────────────────╮
-│ *  --output-dir         PATH     Output directory [default: None] [required]     │
-│ *  --gt-dir             PATH     Dataset source path [default: None] [required]  │
-│    --bucket-size        INTEGER  Size of CVAT tasks [default: 20]                │
-│    --help                        Show this message and exit.                     │
-╰──────────────────────────────────────────────────────────────────────────────────╯
+╭─ Options ────────────────────────────────────────────────────────────────────────────────────────╮
+│ *  --output-dir         PATH                     Output directory [default: None] [required]     │
+│ *  --gt-dir             PATH                     Dataset source path [default: None] [required]  │
+│    --bucket-size        INTEGER                  Size of CVAT tasks [default: 20]                │
+│    --use-predictions    --no-use-predictions     use predictions [default: no-use-predictions]   │
+│    --help                                        Show this message and exit.                     │
+╰──────────────────────────────────────────────────────────────────────────────────────────────────╯
 ```
 
 For example, try:
 ```sh
 docling_eval create-cvat \ 
    --gt-dir ./benchmarks/DPBench-GT/gt_dataset/test/ \
-   --output-dir ./benchmarks/DPBench-CVAT/
+   --output-dir ./benchmarks/DPBench-CVAT/ \
+   --no-use-predictions
 ```
 
 In essence, this will read the parquet files and set up a new directory structure (designated output) that has the following layout,
diff --git a/docs/examples/package_pdfs.py b/docs/examples/package_pdfs.py
index 0db3041b..2be54849 100644
--- a/docs/examples/package_pdfs.py
+++ b/docs/examples/package_pdfs.py
@@ -17,7 +17,6 @@
     save_shard_to_disk,
 )
 from docling_eval.visualisation.constants import HTML_INSPECTION
-from docling_eval.visualisation.visualisations import draw_clusters_with_reading_order
 
 # Configure logging
 logging.basicConfig(
@@ -164,43 +163,27 @@ def main():
         for key, item in conv_results.timings.items():
             timings[key] = json.loads(item.model_dump_json())
 
-        html_doc = pred_doc.export_to_html(
+        filename = viz_dir / f"{os.path.basename(pdf_file)}.html"
+        pred_doc.save_as_html(
+            filename=filename,
+            labels=PRED_HTML_EXPORT_LABELS,
             image_mode=ImageRefMode.EMBEDDED,
-            # html_head=HTML_DEFAULT_HEAD_FOR_COMP,
-            # labels=pred_labels,
+            split_page_view=True,
         )
 
-        html_doc = html_doc.replace("'", "&#39;")
-
-        page_images = []
-        page_template = '<div class="image-wrapper"><img src="data:image/png;base64,BASE64PAGE" alt="Example Image"></div>'
-        for page_no, page in pred_doc.pages.items():
-            page_img = page.image.pil_image
-
-            page_img = draw_clusters_with_reading_order(
-                doc=pred_doc,
-                page_image=page_img,
-                labels=PRED_HTML_EXPORT_LABELS,
-                page_no=page_no,
-                reading_order=True,
-            )
-
-            page_base64 = from_pil_to_base64(page_img)
-            page_images.append(page_template.replace("BASE64PAGE", page_base64))
-
-        page = copy.deepcopy(HTML_INSPECTION)
-        page = page.replace("PREDDOC", html_doc)
-        page = page.replace("PAGE_IMAGES", "\n".join(page_images))
-
-        filename = viz_dir / f"{os.path.basename(pdf_file)}.html"
-        with open(str(filename), "w") as fw:
-            fw.write(page)
+        pred_doc, pred_pictures, pred_page_images = extract_images(
+            document=pred_doc,
+            pictures_column=BenchMarkColumns.PREDICTION_PICTURES.value,  # pictures_column,
+            page_images_column=BenchMarkColumns.PREDICTION_PAGE_IMAGES.value,  # page_images_column,
+        )
 
         record = {
             BenchMarkColumns.CONVERTER_VERSION: docling_version(),
             BenchMarkColumns.STATUS: str(conv_results.status.value),
             BenchMarkColumns.DOC_ID: str(os.path.basename(pdf_file)),
             BenchMarkColumns.PREDICTION: json.dumps(pred_doc.export_to_dict()),
+            BenchMarkColumns.PREDICTION_PAGE_IMAGES: pred_page_images,
+            BenchMarkColumns.PREDICTION_PICTURES: pred_pictures,
             BenchMarkColumns.ORIGINAL: get_binary(pdf_file),
             BenchMarkColumns.MIMETYPE: "application/pdf",
             BenchMarkColumns.TIMINGS: json.dumps(timings),
diff --git a/docs/examples/package_pngs.py b/docs/examples/package_pngs.py
index b640dbd0..5ab47c22 100644
--- a/docs/examples/package_pngs.py
+++ b/docs/examples/package_pngs.py
@@ -19,7 +19,8 @@
     save_shard_to_disk,
 )
 from docling_eval.visualisation.constants import HTML_INSPECTION
-from docling_eval.visualisation.visualisations import draw_clusters_with_reading_order
+
+# from docling_eval.visualisation.visualisations import draw_clusters_with_reading_order
 
 # Configure logging
 logging.basicConfig(
@@ -173,44 +174,14 @@ def main():
         for key, item in conv_results.timings.items():
             timings[key] = json.loads(item.model_dump_json())
 
-        html_doc = pred_doc.export_to_html(
+        filename = viz_dir / f"{os.path.basename(img_file)}.html"
+        pred_doc.save_as_html(
+            filename=filename,
+            labels=PRED_HTML_EXPORT_LABELS,
             image_mode=ImageRefMode.EMBEDDED,
-            # html_head=HTML_DEFAULT_HEAD_FOR_COMP,
-            # labels=pred_labels,
+            split_page_view=True,
         )
 
-        html_doc = html_doc.replace("'", "&#39;")
-
-        page_images = []
-        page_template = '<div class="image-wrapper"><img src="data:image/png;base64,BASE64PAGE" alt="Example Image"></div>'
-        for page_no, page in pred_doc.pages.items():
-
-            page_img = page.image.pil_image
-
-            # page_img = PILImage.open(png_file)
-            # assert page.size.width==page_img.width, f"page.size.width==page_img.width {page.size.width}=={page_img.width}"
-            # assert page.size.height==page_img.height, f"page.size.height==page_img.height {page.size.height}=={page_img.height}"
-            # page_img.show()
-
-            page_img = draw_clusters_with_reading_order(
-                doc=pred_doc,
-                page_image=page_img,
-                labels=PRED_HTML_EXPORT_LABELS,
-                page_no=page_no,
-                reading_order=True,
-            )
-
-            page_base64 = from_pil_to_base64(page_img)
-            page_images.append(page_template.replace("BASE64PAGE", page_base64))
-
-        page = copy.deepcopy(HTML_INSPECTION)
-        page = page.replace("PREDDOC", html_doc)
-        page = page.replace("PAGE_IMAGES", "\n".join(page_images))
-
-        filename = viz_dir / f"{os.path.basename(img_file)}.html"
-        with open(str(filename), "w") as fw:
-            fw.write(page)
-
         pred_doc, pred_pictures, pred_page_images = extract_images(
             document=pred_doc,
             pictures_column=BenchMarkColumns.PREDICTION_PICTURES.value,  # pictures_column,
diff --git a/poetry.lock b/poetry.lock
index ba2a192d..5be3a7a7 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1532,14 +1532,14 @@ vlm = ["accelerate (>=1.2.1,<2.0.0) ; sys_platform != \"darwin\" or platform_mac
 
 [[package]]
 name = "docling-core"
-version = "2.27.0"
+version = "2.28.0"
 description = "A python library to define and validate data types in Docling."
 optional = false
 python-versions = "<4.0,>=3.9"
 groups = ["main"]
 files = [
-    {file = "docling_core-2.27.0-py3-none-any.whl", hash = "sha256:3bd5ac5e2673b3688c45c9c5beaef9922006c380895933c90393008ae15ed693"},
-    {file = "docling_core-2.27.0.tar.gz", hash = "sha256:b0bee5501a6f7b0a8b888f47ba6e51f82cd559b6afd56bf0c9bce7f71168f87d"},
+    {file = "docling_core-2.28.0-py3-none-any.whl", hash = "sha256:f1a01446996b90c4c151ec0ad247283888e6372f9dac0e356d06f8b9838ca4ca"},
+    {file = "docling_core-2.28.0.tar.gz", hash = "sha256:16a762c251063839d7b20624cd6f89c2488872377f8546b037e604606014fb66"},
 ]
 
 [package.dependencies]
@@ -1550,7 +1550,7 @@ pandas = ">=2.1.4,<3.0.0"
 pillow = ">=10.0.0,<12.0.0"
 pydantic = ">=2.6.0,<2.10.0 || >2.10.0,<2.10.1 || >2.10.1,<2.10.2 || >2.10.2,<3.0.0"
 pyyaml = ">=5.1,<7.0.0"
-semchunk = {version = ">=2.2.0,<3.0.0", optional = true, markers = "extra == \"chunking\""}
+semchunk = {version = ">=2.2.0,<3.0.0", optional = true, markers = "extra == \"chunking\" or extra == \"chunking-openai\""}
 tabulate = ">=0.9.0,<0.10.0"
 transformers = {version = ">=4.34.0,<5.0.0", optional = true, markers = "extra == \"chunking\""}
 typer = ">=0.12.5,<0.16.0"
@@ -1558,6 +1558,7 @@ typing-extensions = ">=4.12.2,<5.0.0"
 
 [package.extras]
 chunking = ["semchunk (>=2.2.0,<3.0.0)", "transformers (>=4.34.0,<5.0.0)"]
+chunking-openai = ["semchunk (>=2.2.0,<3.0.0)", "tiktoken (>=0.9.0,<0.10.0)"]
 
 [[package]]
 name = "docling-ibm-models"
@@ -4294,8 +4295,8 @@ files = [
 numpy = [
     {version = ">=1.21.4", markers = "python_version >= \"3.10\" and platform_system == \"Darwin\""},
     {version = ">=1.21.2", markers = "platform_system != \"Darwin\" and python_version >= \"3.10\""},
-    {version = ">=1.23.5", markers = "python_version >= \"3.11\""},
     {version = ">=1.26.0", markers = "python_version >= \"3.12\""},
+    {version = ">=1.23.5", markers = "python_version >= \"3.11\""},
 ]
 
 [[package]]
@@ -4335,43 +4336,31 @@ groups = ["main"]
 files = [
     {file = "pandas-2.2.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:1948ddde24197a0f7add2bdc4ca83bf2b1ef84a1bc8ccffd95eda17fd836ecb5"},
     {file = "pandas-2.2.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:381175499d3802cde0eabbaf6324cce0c4f5d52ca6f8c377c29ad442f50f6348"},
-    {file = "pandas-2.2.3-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d9c45366def9a3dd85a6454c0e7908f2b3b8e9c138f5dc38fed7ce720d8453ed"},
     {file = "pandas-2.2.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:86976a1c5b25ae3f8ccae3a5306e443569ee3c3faf444dfd0f41cda24667ad57"},
-    {file = "pandas-2.2.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:b8661b0238a69d7aafe156b7fa86c44b881387509653fdf857bebc5e4008ad42"},
     {file = "pandas-2.2.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:37e0aced3e8f539eccf2e099f65cdb9c8aa85109b0be6e93e2baff94264bdc6f"},
     {file = "pandas-2.2.3-cp310-cp310-win_amd64.whl", hash = "sha256:56534ce0746a58afaf7942ba4863e0ef81c9c50d3f0ae93e9497d6a41a057645"},
     {file = "pandas-2.2.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:66108071e1b935240e74525006034333f98bcdb87ea116de573a6a0dccb6c039"},
     {file = "pandas-2.2.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7c2875855b0ff77b2a64a0365e24455d9990730d6431b9e0ee18ad8acee13dbd"},
-    {file = "pandas-2.2.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:cd8d0c3be0515c12fed0bdbae072551c8b54b7192c7b1fda0ba56059a0179698"},
     {file = "pandas-2.2.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c124333816c3a9b03fbeef3a9f230ba9a737e9e5bb4060aa2107a86cc0a497fc"},
-    {file = "pandas-2.2.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:63cc132e40a2e084cf01adf0775b15ac515ba905d7dcca47e9a251819c575ef3"},
     {file = "pandas-2.2.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:29401dbfa9ad77319367d36940cd8a0b3a11aba16063e39632d98b0e931ddf32"},
     {file = "pandas-2.2.3-cp311-cp311-win_amd64.whl", hash = "sha256:3fc6873a41186404dad67245896a6e440baacc92f5b716ccd1bc9ed2995ab2c5"},
     {file = "pandas-2.2.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:b1d432e8d08679a40e2a6d8b2f9770a5c21793a6f9f47fdd52c5ce1948a5a8a9"},
     {file = "pandas-2.2.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a5a1595fe639f5988ba6a8e5bc9649af3baf26df3998a0abe56c02609392e0a4"},
-    {file = "pandas-2.2.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:5de54125a92bb4d1c051c0659e6fcb75256bf799a732a87184e5ea503965bce3"},
     {file = "pandas-2.2.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fffb8ae78d8af97f849404f21411c95062db1496aeb3e56f146f0355c9989319"},
-    {file = "pandas-2.2.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6dfcb5ee8d4d50c06a51c2fffa6cff6272098ad6540aed1a76d15fb9318194d8"},
     {file = "pandas-2.2.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:062309c1b9ea12a50e8ce661145c6aab431b1e99530d3cd60640e255778bd43a"},
     {file = "pandas-2.2.3-cp312-cp312-win_amd64.whl", hash = "sha256:59ef3764d0fe818125a5097d2ae867ca3fa64df032331b7e0917cf5d7bf66b13"},
     {file = "pandas-2.2.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f00d1345d84d8c86a63e476bb4955e46458b304b9575dcf71102b5c705320015"},
     {file = "pandas-2.2.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:3508d914817e153ad359d7e069d752cdd736a247c322d932eb89e6bc84217f28"},
-    {file = "pandas-2.2.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:22a9d949bfc9a502d320aa04e5d02feab689d61da4e7764b62c30b991c42c5f0"},
     {file = "pandas-2.2.3-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f3a255b2c19987fbbe62a9dfd6cff7ff2aa9ccab3fc75218fd4b7530f01efa24"},
-    {file = "pandas-2.2.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:800250ecdadb6d9c78eae4990da62743b857b470883fa27f652db8bdde7f6659"},
     {file = "pandas-2.2.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6374c452ff3ec675a8f46fd9ab25c4ad0ba590b71cf0656f8b6daa5202bca3fb"},
     {file = "pandas-2.2.3-cp313-cp313-win_amd64.whl", hash = "sha256:61c5ad4043f791b61dd4752191d9f07f0ae412515d59ba8f005832a532f8736d"},
     {file = "pandas-2.2.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:3b71f27954685ee685317063bf13c7709a7ba74fc996b84fc6821c59b0f06468"},
     {file = "pandas-2.2.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:38cf8125c40dae9d5acc10fa66af8ea6fdf760b2714ee482ca691fc66e6fcb18"},
-    {file = "pandas-2.2.3-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ba96630bc17c875161df3818780af30e43be9b166ce51c9a18c1feae342906c2"},
     {file = "pandas-2.2.3-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1db71525a1538b30142094edb9adc10be3f3e176748cd7acc2240c2f2e5aa3a4"},
-    {file = "pandas-2.2.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:15c0e1e02e93116177d29ff83e8b1619c93ddc9c49083f237d4312337a61165d"},
     {file = "pandas-2.2.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:ad5b65698ab28ed8d7f18790a0dc58005c7629f227be9ecc1072aa74c0c1d43a"},
     {file = "pandas-2.2.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:bc6b93f9b966093cb0fd62ff1a7e4c09e6d546ad7c1de191767baffc57628f39"},
     {file = "pandas-2.2.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:5dbca4c1acd72e8eeef4753eeca07de9b1db4f398669d5994086f788a5d7cc30"},
-    {file = "pandas-2.2.3-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8cd6d7cc958a3910f934ea8dbdf17b2364827bb4dafc38ce6eef6bb3d65ff09c"},
     {file = "pandas-2.2.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:99df71520d25fade9db7c1076ac94eb994f4d2673ef2aa2e86ee039b6746d20c"},
-    {file = "pandas-2.2.3-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:31d0ced62d4ea3e231a9f228366919a5ea0b07440d9d4dac345376fd8e1477ea"},
     {file = "pandas-2.2.3-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:7eee9e7cea6adf3e3d24e304ac6b8300646e2a5d1cd3a3c2abed9101b0846761"},
     {file = "pandas-2.2.3-cp39-cp39-win_amd64.whl", hash = "sha256:4850ba03528b6dd51d6c5d273c46f183f39a9baf3f0143e566b89450965b105e"},
     {file = "pandas-2.2.3.tar.gz", hash = "sha256:4f18ba62b61d7e192368b84517265a99b4d7ee8912f8708660fb4a366cc82667"},
@@ -4380,8 +4369,8 @@ files = [
 [package.dependencies]
 numpy = [
     {version = ">=1.22.4", markers = "python_version < \"3.11\""},
-    {version = ">=1.23.2", markers = "python_version == \"3.11\""},
     {version = ">=1.26.0", markers = "python_version >= \"3.12\""},
+    {version = ">=1.23.2", markers = "python_version == \"3.11\""},
 ]
 python-dateutil = ">=2.8.2"
 pytz = ">=2020.1"
@@ -7100,11 +7089,6 @@ optional = false
 python-versions = ">=3.9"
 groups = ["main"]
 files = [
-    {file = "torchvision-0.21.0-1-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:5568c5a1ff1b2ec33127b629403adb530fab81378d9018ca4ed6508293f76e2b"},
-    {file = "torchvision-0.21.0-1-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:ff96666b94a55e802ea6796cabe788541719e6f4905fc59c380fed3517b6a64d"},
-    {file = "torchvision-0.21.0-1-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:ffa2a16499508fe6798323e455f312c7c55f2a88901c9a7c0fb1efa86cf7e327"},
-    {file = "torchvision-0.21.0-1-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:7e9e9afa150e40cd2a8f0701c43cb82a8d724f512896455c0918b987f94b84a4"},
-    {file = "torchvision-0.21.0-1-cp39-cp39-manylinux_2_28_aarch64.whl", hash = "sha256:669575b290ec27304569e188a960d12b907d5173f9cd65e86621d34c4e5b6c30"},
     {file = "torchvision-0.21.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:044ea420b8c6c3162a234cada8e2025b9076fa82504758cd11ec5d0f8cd9fa37"},
     {file = "torchvision-0.21.0-cp310-cp310-manylinux1_x86_64.whl", hash = "sha256:b0c0b264b89ab572888244f2e0bad5b7eaf5b696068fc0b93e96f7c3c198953f"},
     {file = "torchvision-0.21.0-cp310-cp310-manylinux_2_28_aarch64.whl", hash = "sha256:54815e0a56dde95cc6ec952577f67e0dc151eadd928e8d9f6a7f821d69a4a734"},
@@ -7996,4 +7980,4 @@ hyperscalers = ["azure-ai-formrecognizer", "azure-common", "azure-core", "boto3"
 [metadata]
 lock-version = "2.1"
 python-versions = "^3.10"
-content-hash = "6e4e1c7151fc551b4f3f2bd94f3c6906dd554384484da38b0e5bcc5603c59fdd"
+content-hash = "ad48608fca439c925fd79021a7323b74448f506595a7c79f652a07e9538dbd13"
diff --git a/pyproject.toml b/pyproject.toml
index 60afaa23..fc672ad8 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -25,7 +25,7 @@ packages = [{include = "docling_eval"}]
 # actual dependencies:
 ######################
 python = "^3.10"
-docling-core = "^2.27.0"
+docling-core = "^2.28.0"
 pydantic = "^2.0.0"
 lxml = "^5.3.0"
 datasets = "^3.2.0"