diff --git a/docling_eval/cli/main.py b/docling_eval/cli/main.py
index e1b566f7..6fcc56c1 100644
--- a/docling_eval/cli/main.py
+++ b/docling_eval/cli/main.py
@@ -31,6 +31,9 @@
 )
 from docling_eval.dataset_builders.doclaynet_v1_builder import DocLayNetV1DatasetBuilder
 from docling_eval.dataset_builders.doclaynet_v2_builder import DocLayNetV2DatasetBuilder
+from docling_eval.dataset_builders.doclingdpbench_builder import (
+    DoclingDPBenchDatasetBuilder,
+)
 from docling_eval.dataset_builders.docvqa_builder import DocVQADatasetBuilder
 from docling_eval.dataset_builders.dpbench_builder import DPBenchDatasetBuilder
 from docling_eval.dataset_builders.file_dataset_builder import FileDatasetBuilder
@@ -65,6 +68,10 @@
     DatasetTableEvaluation,
     TableEvaluator,
 )
+from docling_eval.evaluators.timings_evaluator import (
+    DatasetTimingsEvaluation,
+    TimingsEvaluator,
+)
 from docling_eval.prediction_providers.docling_provider import DoclingPredictionProvider
 from docling_eval.prediction_providers.file_provider import FilePredictionProvider
 from docling_eval.prediction_providers.tableformer_provider import (
@@ -72,13 +79,16 @@
 )
 
 # Configure logging
-logging.getLogger("docling").setLevel(logging.WARNING)
-logging.getLogger("PIL").setLevel(logging.WARNING)
-logging.getLogger("transformers").setLevel(logging.WARNING)
-logging.getLogger("datasets").setLevel(logging.WARNING)
-logging.getLogger("filelock").setLevel(logging.WARNING)
-logging.getLogger("urllib3").setLevel(logging.WARNING)
-logging.getLogger("docling_ibm_models").setLevel(logging.WARNING)
+logging_level = logging.WARNING
+# logging_level = logging.DEBUG
+logging.getLogger("docling").setLevel(logging_level)
+logging.getLogger("PIL").setLevel(logging_level)
+logging.getLogger("transformers").setLevel(logging_level)
+logging.getLogger("datasets").setLevel(logging_level)
+logging.getLogger("filelock").setLevel(logging_level)
+logging.getLogger("urllib3").setLevel(logging_level)
+logging.getLogger("docling_ibm_models").setLevel(logging_level)
+logging.getLogger("matplotlib").setLevel(logging_level)
 
 _log = logging.getLogger(__name__)
 
@@ -156,6 +166,9 @@ def get_dataset_builder(
     if benchmark == BenchMarkNames.DPBENCH:
         return DPBenchDatasetBuilder(**common_params)  # type: ignore
 
+    elif benchmark == BenchMarkNames.DOCLING_DPBENCH:
+        return DoclingDPBenchDatasetBuilder(**common_params)  # type: ignore
+
     elif benchmark == BenchMarkNames.DOCLAYNETV1:
         return DocLayNetV1DatasetBuilder(**common_params)  # type: ignore
 
@@ -418,6 +431,16 @@ def evaluate(
     if modality == EvaluationModality.END2END:
         _log.error("END2END evaluation not supported. ")
 
+    elif modality == EvaluationModality.TIMINGS:
+        timings_evaluator = TimingsEvaluator()
+        evaluation = timings_evaluator(  # type: ignore
+            idir,
+            split=split,
+        )
+
+        with open(save_fn, "w") as fd:
+            json.dump(evaluation.model_dump(), fd, indent=2, sort_keys=True)
+
     elif modality == EvaluationModality.LAYOUT:
         layout_evaluator = LayoutEvaluator()
         evaluation = layout_evaluator(  # type: ignore
@@ -538,6 +561,31 @@ def visualize(
     if modality == EvaluationModality.END2END:
         _log.error("END2END visualization not supported")
 
+    elif modality == EvaluationModality.TIMINGS:
+        try:
+            with open(metrics_filename, "r") as fd:
+                timings_evaluation = DatasetTimingsEvaluation.model_validate_json(
+                    fd.read()
+                )
+
+            log_and_save_stats(
+                odir,
+                benchmark,
+                modality,
+                "time_to_solution_per_doc",
+                timings_evaluation.timing_per_document_stats,
+            )
+
+            log_and_save_stats(
+                odir,
+                benchmark,
+                modality,
+                "time_to_solution_per_page",
+                timings_evaluation.timing_per_page_stats,
+            )
+        except Exception as e:
+            _log.error(f"Error processing timings evaluation: {str(e)}")
+
     elif modality == EvaluationModality.LAYOUT:
         try:
             with open(metrics_filename, "r") as fd:
@@ -554,6 +602,30 @@ def visualize(
                 layout_evaluation.map_stats,
             )
 
+            log_and_save_stats(
+                odir,
+                benchmark,
+                modality,
+                "precision",
+                layout_evaluation.segmentation_precision_stats,
+            )
+
+            log_and_save_stats(
+                odir,
+                benchmark,
+                modality,
+                "recall",
+                layout_evaluation.segmentation_recall_stats,
+            )
+
+            log_and_save_stats(
+                odir,
+                benchmark,
+                modality,
+                "f1",
+                layout_evaluation.segmentation_f1_stats,
+            )
+
             # Append to layout statistics, the AP per classes
             data, headers = layout_evaluation.to_table()
             content = "\n\n\nAP[0.5:0.05:0.95] per class (reported as %):\n\n"
@@ -724,6 +796,7 @@ def create_gt(
     end_index: Annotated[
         int, typer.Option(help="End index (exclusive), -1 for all")
     ] = -1,
+    chunk_size: Annotated[int, typer.Option(help="chunk size")] = 80,
 ):
     """Create ground truth dataset only."""
     gt_dir = output_dir / "gt_dataset"
@@ -741,7 +814,7 @@ def create_gt(
         # Retrieve and save the dataset
         if dataset_builder.must_retrieve:
             dataset_builder.retrieve_input_dataset()
-        dataset_builder.save_to_disk(chunk_size=80)
+        dataset_builder.save_to_disk(chunk_size=chunk_size)
 
         _log.info(f"Ground truth dataset created at {gt_dir}")
     except ValueError as e:
@@ -841,6 +914,7 @@ def create(
     end_index: Annotated[
         int, typer.Option(help="End index (exclusive), -1 for all")
     ] = -1,
+    chunk_size: Annotated[int, typer.Option(help="chunk size")] = 80,
     prediction_provider: Annotated[
         Optional[PredictionProviderType],
         typer.Option(help="Type of prediction provider to use"),
@@ -861,6 +935,7 @@ def create(
         split=split,
         begin_index=begin_index,
         end_index=end_index,
+        chunk_size=chunk_size,
     )
 
     # Then create evaluation if provider specified
diff --git a/docling_eval/datamodels/dataset_record.py b/docling_eval/datamodels/dataset_record.py
index 94c9d710..ed88bf7c 100644
--- a/docling_eval/datamodels/dataset_record.py
+++ b/docling_eval/datamodels/dataset_record.py
@@ -173,6 +173,7 @@ class DatasetRecordWithPrediction(DatasetRecord):
     )
     original_prediction: Optional[str] = None
     prediction_format: PredictionFormats  # some enum type
+    prediction_timings: Optional[Dict] = Field(alias="prediction_timings", default=None)
 
     predicted_page_images: List[PIL.Image.Image] = Field(
         alias="PredictionPageImages", default=[]
@@ -201,6 +202,7 @@ def features(cls):
             cls.get_field_alias("mime_type"): Value("string"),
             cls.get_field_alias("modalities"): Sequence(Value("string")),
             cls.get_field_alias("prediction_format"): Value("string"),
+            cls.get_field_alias("prediction_timings"): Value("string"),
         }
 
     def as_record_dict(self):
@@ -208,6 +210,7 @@ def as_record_dict(self):
         record.update(
             {
                 self.get_field_alias("prediction_format"): self.prediction_format.value,
+                self.get_field_alias("prediction_timings"): self.prediction_timings,
             }
         )
 
diff --git a/docling_eval/datamodels/types.py b/docling_eval/datamodels/types.py
index 120ee414..04a0fd88 100644
--- a/docling_eval/datamodels/types.py
+++ b/docling_eval/datamodels/types.py
@@ -47,12 +47,14 @@ class EvaluationModality(str, Enum):
     OCR = "ocr"
     KEY_VALUE = "key_value"
     QUESTION_ANSWERING = "question_answering"
+    TIMINGS = "timings"
 
 
 class BenchMarkNames(str, Enum):
 
     # End-to-End
     DPBENCH = "DPBench"
+    DOCLING_DPBENCH = "DoclingDPBench"
     OMNIDOCBENCH = "OmniDocBench"
     WORDSCAPE = "WordScape"
 
diff --git a/docling_eval/dataset_builders/doclingdpbench_builder.py b/docling_eval/dataset_builders/doclingdpbench_builder.py
new file mode 100644
index 00000000..6f2178bf
--- /dev/null
+++ b/docling_eval/dataset_builders/doclingdpbench_builder.py
@@ -0,0 +1,103 @@
+import json
+import logging
+import os
+from io import BytesIO
+from pathlib import Path
+from typing import Dict, Iterable, Set
+
+from datasets import load_dataset
+from docling_core.types import DoclingDocument
+from docling_core.types.io import DocumentStream
+from PIL import Image as PILImage
+
+from docling_eval.datamodels.dataset_record import DatasetRecord
+from docling_eval.dataset_builders.dataset_builder import (
+    BaseEvaluationDatasetBuilder,
+    HFSource,
+)
+from docling_eval.utils.utils import get_binary, get_binhash
+
+# Get logger
+_log = logging.getLogger(__name__)
+
+
+class DoclingDPBenchDatasetBuilder(BaseEvaluationDatasetBuilder):
+    """
+    DoclingDPBench dataset builder implementing the base dataset builder interface.
+
+    This builder processes the DoclingDPBench dataset, which contains document
+    understanding benchmarks for various document types.
+    """
+
+    def __init__(
+        self,
+        target: Path,
+        split: str = "test",
+        begin_index: int = 0,
+        end_index: int = -1,
+    ):
+        """
+        Initialize the DoclingDPBench dataset builder.
+
+        Args:
+            target: Path where processed dataset will be saved
+            split: Dataset split to use
+            begin_index: Start index for processing (inclusive)
+            end_index: End index for processing (exclusive), -1 means process all
+        """
+        super().__init__(
+            name="DoclingDPBench",
+            dataset_source=HFSource(repo_id="ds4sd/docling-dpbench"),
+            target=target,
+            split=split,
+            begin_index=begin_index,
+            end_index=end_index,
+        )
+
+        self.must_retrieve = True
+
+    def iterate(self) -> Iterable[DatasetRecord]:
+        """
+        Iterate through the dataset and yield DatasetRecord objects.
+
+        Yields:
+            DatasetRecord objects
+        """
+        if not self.retrieved and self.must_retrieve:
+            raise RuntimeError(
+                "You must first retrieve the source dataset. Call retrieve_input_dataset()."
+            )
+
+        assert self.dataset_local_path is not None
+        _log.info(f"dataset_local_path: {self.dataset_local_path}")
+
+        # Login using e.g. `huggingface-cli login` to access this dataset
+        ds = load_dataset("ds4sd/docling-dpbench")
+
+        for idx, _ in enumerate(ds["test"]):
+            doc_hash = str(get_binhash(_["BinaryDocument"]))
+            doc = (DoclingDocument.model_validate_json(_["GroundTruthDocument"]),)
+
+            page_images = [
+                PILImage.open(BytesIO(__["bytes"])) for __ in _["GroundTruthPageImages"]
+            ]
+            pictures = [
+                PILImage.open(BytesIO(__["bytes"])) for __ in _["GroundTruthPictures"]
+            ]
+
+            pdf_stream = DocumentStream(
+                name=f"ds4sd/docling-dpbench/{idx}", stream=BytesIO(_["BinaryDocument"])
+            )
+
+            # Create dataset record
+            record = DatasetRecord(
+                doc_id=str(_["document_id"]),
+                doc_hash=doc_hash,
+                ground_truth_doc=doc[0],
+                ground_truth_pictures=pictures,
+                ground_truth_page_images=page_images,
+                original=pdf_stream,
+                mime_type=_["mimetype"],
+            )
+
+            yield record
diff --git a/docling_eval/dataset_builders/file_dataset_builder.py b/docling_eval/dataset_builders/file_dataset_builder.py
index 111bd19f..6b2e9d59 100644
--- a/docling_eval/dataset_builders/file_dataset_builder.py
+++ b/docling_eval/dataset_builders/file_dataset_builder.py
@@ -108,7 +108,7 @@ def iterate(self) -> Iterable[DatasetRecord]:
             # Create the ground truth Document
             true_doc = DoclingDocument(name=f"{filename}")
             if mime_type == "application/pdf":
-                _log.info(f"add_pages_to_true_doc: {filename}")
+                _log.debug(f"add_pages_to_true_doc: {filename}")
                 true_doc, _ = add_pages_to_true_doc(
                     pdf_path=filename, true_doc=true_doc, image_scale=2.0
                 )
@@ -127,7 +127,7 @@ def iterate(self) -> Iterable[DatasetRecord]:
                     image=image_ref,
                 )
 
-                _log.info(f"add_pages_to_true_doc: {filename}")
+                _log.debug(f"add_pages_to_true_doc: {filename}")
                 true_doc.pages[1] = page_item
             else:
                 raise ValueError(
diff --git a/docling_eval/evaluators/layout_evaluator.py b/docling_eval/evaluators/layout_evaluator.py
index 63646762..31ee0a5e 100644
--- a/docling_eval/evaluators/layout_evaluator.py
+++ b/docling_eval/evaluators/layout_evaluator.py
@@ -59,6 +59,10 @@ class ImageLayoutEvaluation(UnitEvaluation):
     avg_weighted_label_matched_iou_90: float
     avg_weighted_label_matched_iou_95: float
 
+    segmentation_precision: float
+    segmentation_recall: float
+    segmentation_f1: float
+
 
 class DatasetLayoutEvaluation(DatasetEvaluation):
     true_labels: Dict[str, int]
@@ -78,6 +82,10 @@ class DatasetLayoutEvaluation(DatasetEvaluation):
     weighted_map_90_stats: DatasetStatistics
     weighted_map_95_stats: DatasetStatistics
 
+    segmentation_precision_stats: DatasetStatistics
+    segmentation_recall_stats: DatasetStatistics
+    segmentation_f1_stats: DatasetStatistics
+
     def to_table(self) -> Tuple[List[List[str]], List[str]]:
 
         headers = ["label", "Class mAP[0.5:0.95]"]
@@ -137,19 +145,28 @@ def __call__(
         # Load the dataset
         split_path = str(ds_path / split / "*.parquet")
         split_files = glob.glob(split_path)
-        logging.info("Files: %s", split_files)
+        logging.info("#-files: %s", len(split_files))
         ds = load_dataset("parquet", data_files={split: split_files})
         logging.info("Overview of dataset: %s", ds)
 
         # Select the split
         ds_selection: Dataset = ds[split]
 
-        true_labels, pred_labels, intersection_labels = self._find_intersecting_labels(
-            ds_selection
+        true_labels, pred_labels, intersection_labels, union_labels = (
+            self._find_intersecting_labels(ds_selection)
         )
-        intersection_labels_str = "\n" + "\n".join(sorted(intersection_labels))
+        true_labels_str = ", ".join(sorted(true_labels))
+        logging.info(f"True labels: {true_labels_str}")
+
+        pred_labels_str = ", ".join(sorted(pred_labels))
+        logging.info(f"Pred labels: {pred_labels_str}")
+
+        intersection_labels_str = ", ".join(sorted(intersection_labels))
         logging.info(f"Intersection labels: {intersection_labels_str}")
 
+        union_labels_str = ", ".join(sorted(union_labels))
+        logging.info(f"Union labels: {union_labels_str}")
+
         doc_ids = []
         ground_truths = []
         predictions = []
@@ -187,6 +204,9 @@ def __call__(
                 filter_labels=intersection_labels,
             )
 
+            # logging.info(f"gts: {gts}")
+            # logging.info(f"preds: {preds}")
+
             if len(gts) > 0:
                 for i in range(len(gts)):
                     doc_ids.append(data[BenchMarkColumns.DOC_ID] + f"-page-{i}")
@@ -258,8 +278,19 @@ def __call__(
         for i, (doc_id, pred, gt) in enumerate(
             zip(doc_ids, predictions, ground_truths)
         ):
+            # logging.info(f"gt: {gt}")
+            # logging.info(f"pred: {pred}")
+
+            precision, recall, f1 = self._compute_area_level_metrics_for_tensors(
+                gt_boxes=gt["boxes"],
+                pred_boxes=pred["boxes"],
+                page_width=100,
+                page_height=100,
+                mask_width=512,
+                mask_height=512,
+            )
+
             # Reset the metric for the next image
-            # metric.reset()
             metric = MeanAveragePrecision(iou_type="bbox", class_metrics=True)
 
             # Update with single image
@@ -293,6 +324,10 @@ def __call__(
             weighted_map_90_values.append(average_iou_90)
             weighted_map_95_values.append(average_iou_95)
 
+            logging.info(
+                f"doc: {doc_id}\tprecision: {precision:.2f}, recall: {recall:.2f}, f1: {f1:.2f}, map_50: {map_50:.2f}"
+            )
+
             image_evaluation = ImageLayoutEvaluation(
                 name=doc_id,
                 value=average_iou_50,
@@ -303,6 +338,9 @@ def __call__(
                 avg_weighted_label_matched_iou_75=average_iou_75,
                 avg_weighted_label_matched_iou_90=average_iou_90,
                 avg_weighted_label_matched_iou_95=average_iou_95,
+                segmentation_precision=precision,
+                segmentation_recall=recall,
+                segmentation_f1=f1,
             )
             evaluations_per_image.append(image_evaluation)
             if self._intermediate_evaluations_path:
@@ -326,6 +364,15 @@ def __call__(
             weighted_map_75_stats=compute_stats(weighted_map_75_values),
             weighted_map_90_stats=compute_stats(weighted_map_90_values),
             weighted_map_95_stats=compute_stats(weighted_map_95_values),
+            segmentation_precision_stats=compute_stats(
+                [_.segmentation_precision for _ in evaluations_per_image]
+            ),
+            segmentation_recall_stats=compute_stats(
+                [_.segmentation_recall for _ in evaluations_per_image]
+            ),
+            segmentation_f1_stats=compute_stats(
+                [_.segmentation_f1 for _ in evaluations_per_image]
+            ),
             true_labels=true_labels,
             pred_labels=pred_labels,
             intersecting_labels=[_.value for _ in intersection_labels],
@@ -449,7 +496,7 @@ def _compute_average_iou_with_labels_across_iou(
     def _find_intersecting_labels(
         self,
         ds: Dataset,
-    ) -> tuple[dict[str, int], dict[str, int], list[DocItemLabel]]:
+    ) -> tuple[dict[str, int], dict[str, int], list[DocItemLabel], list[DocItemLabel]]:
         r"""
         Compute counters per labels for the groundtruth, prediciton and their intersections
 
@@ -502,11 +549,18 @@ def _find_intersecting_labels(
         """
 
         intersection_labels: List[DocItemLabel] = []
+        union_labels: List[DocItemLabel] = []
         for label, count in true_labels.items():
+            union_labels.append(DocItemLabel(label))
+
             if label in pred_labels:
                 intersection_labels.append(DocItemLabel(label))
 
-        return true_labels, pred_labels, intersection_labels
+        for label, count in pred_labels.items():
+            if label not in true_labels:
+                union_labels.append(DocItemLabel(label))
+
+        return true_labels, pred_labels, intersection_labels, union_labels
 
     def _extract_layout_data(
         self,
@@ -572,13 +626,10 @@ def _extract_layout_data(
             for item in items:
                 for prov in item.prov:
                     bbox = prov.bbox.to_top_left_origin(page_height=page_height)
-                    # true_tl_bboxes.append(copy.deepcopy(bbox))
 
                     bbox = bbox.normalized(page_size)
                     bbox = bbox.scaled(100.0)
 
-                    # logging.info(f"ground-truth {page_no}: {page_width, page_height} -> {item.label}, {bbox.coord_origin}: [{bbox.l}, {bbox.t}, {bbox.r}, {bbox.b}]")
-
                     bboxes.append([bbox.l, bbox.t, bbox.r, bbox.b])
                     labels.append(filter_labels.index(self.label_mapping[item.label]))  # type: ignore
 
@@ -635,3 +686,94 @@ def _extract_layout_data(
         # print(pred_tl_bboxes_str)
 
         return ground_truths, predictions
+
+    def _compute_area_level_metrics_for_tensors(
+        self,
+        gt_boxes: torch.Tensor,
+        pred_boxes: torch.Tensor,
+        page_width: int,
+        page_height: int,
+        mask_width: int = 512,
+        mask_height: int = 512,
+    ) -> Tuple[float, float, float]:
+        """
+        Compute area-level precision, recall, and F1 score for tensor format boxes.
+        Handles overlapping boxes by using binary masks at the specified resolution.
+
+        Args:
+            gt_boxes: Ground truth boxes as tensor of shape (N, 4) with [x1, y1, x2, y2] format
+            pred_boxes: Predicted boxes as tensor of shape (M, 4) with [x1, y1, x2, y2] format
+            page_width: Width of the original page
+            page_height: Height of the original page
+            mask_width: Width of the mask to use for computation (default: 512)
+            mask_height: Height of the mask to use for computation (default: 512)
+
+        Returns:
+            Dictionary containing precision, recall, and F1 scores
+        """
+        if gt_boxes.shape[0] == 0:
+            precision = 1.0 if pred_boxes.shape[0] == 0 else 0.0
+            recall = 1.0
+            f1 = 1.0 if pred_boxes.shape[0] == 0 else 0.0
+            return precision, recall, f1
+
+        if pred_boxes.shape[0] == 0:
+            precision = 1.0
+            recall = 0.0
+            f1 = 0.0
+            return precision, recall, f1
+
+        # Calculate scaling factors (ensure float division)
+        x_scale = float(mask_width) / float(page_width)
+        y_scale = float(mask_height) / float(page_height)
+
+        # Create empty masks
+        gt_mask = torch.zeros((mask_height, mask_width), dtype=torch.bool, device="cpu")
+        pred_mask = torch.zeros(
+            (mask_height, mask_width), dtype=torch.bool, device="cpu"
+        )
+
+        # Fill ground truth mask
+        for i in range(gt_boxes.shape[0]):
+            x1, y1, x2, y2 = gt_boxes[i].tolist()
+
+            # Scale coordinates to mask space
+            x1, y1 = max(0, int(x1 * x_scale)), max(0, int(y1 * y_scale))
+            x2, y2 = min(mask_width, int(x2 * x_scale)), min(
+                mask_height, int(y2 * y_scale)
+            )
+
+            if x2 > x1 and y2 > y1:
+                gt_mask[y1:y2, x1:x2] = True
+
+        # Fill prediction mask
+        for i in range(pred_boxes.shape[0]):
+            x1, y1, x2, y2 = pred_boxes[i].tolist()
+
+            # Scale coordinates to mask space
+            x1, y1 = max(0, int(x1 * x_scale)), max(0, int(y1 * y_scale))
+            x2, y2 = min(mask_width, int(x2 * x_scale)), min(
+                mask_height, int(y2 * y_scale)
+            )
+
+            if x2 > x1 and y2 > y1:
+                pred_mask[y1:y2, x1:x2] = True
+
+        # Calculate areas (accounting for overlaps)
+        total_gt_area = torch.sum(gt_mask).item()
+        total_pred_area = torch.sum(pred_mask).item()
+
+        # Calculate intersection (logical AND of masks)
+        intersection_mask = torch.logical_and(gt_mask, pred_mask)
+        total_intersection = torch.sum(intersection_mask).item()
+
+        # Calculate metrics
+        precision = total_intersection / total_pred_area if total_pred_area > 0 else 0.0
+        recall = total_intersection / total_gt_area if total_gt_area > 0 else 0.0
+
+        # Calculate F1 score
+        f1 = 0.0
+        if precision + recall > 0:
+            f1 = 2 * (precision * recall) / (precision + recall)
+
+        return precision, recall, f1
diff --git a/docling_eval/evaluators/stats.py b/docling_eval/evaluators/stats.py
index ecd898a4..218bbfb6 100644
--- a/docling_eval/evaluators/stats.py
+++ b/docling_eval/evaluators/stats.py
@@ -74,16 +74,22 @@ def save_histogram(self, figname: Path, name: str = ""):
         plt.savefig(figname)
 
 
-def compute_stats(values: List[float]) -> DatasetStatistics:
+def compute_stats(
+    values: List[float], max_value_is_one: bool = True, nr_bins: int = 20
+) -> DatasetStatistics:
     total: int = len(values)
 
     mean: float = statistics.mean(values) if len(values) > 0 else -1
     median: float = statistics.median(values) if len(values) > 0 else -1
-    std: float = statistics.stdev(values) if len(values) > 0 else -1
+    std: float = statistics.stdev(values) if len(values) > 1 else 0.0
     logging.info(f"total: {total}, mean: {mean}, median: {median}, std: {std}")
 
-    # Compute the histogram with 20 bins between 0 and 1
-    hist, bins = np.histogram(values, bins=20, range=(0, 1))
+    max_value = 1.0
+    if not max_value_is_one and len(values) > 0:
+        max_value = max(values)
+
+    # Compute the histogram
+    hist, bins = np.histogram(values, bins=nr_bins, range=(0, max_value))
     logging.info(f"#-hist: {len(hist)}, #-bins: {len(bins)}")
 
     return DatasetStatistics(
diff --git a/docling_eval/evaluators/timings_evaluator.py b/docling_eval/evaluators/timings_evaluator.py
new file mode 100644
index 00000000..56192564
--- /dev/null
+++ b/docling_eval/evaluators/timings_evaluator.py
@@ -0,0 +1,130 @@
+import glob
+import logging
+from pathlib import Path
+from typing import Dict, List, Optional, Tuple
+
+from datasets import Dataset, load_dataset
+from tqdm import tqdm
+
+from docling_eval.datamodels.dataset_record import DatasetRecordWithPrediction
+from docling_eval.datamodels.types import BenchMarkColumns, PredictionFormats
+from docling_eval.evaluators.base_evaluator import (
+    BaseEvaluator,
+    DatasetEvaluation,
+    EvaluationRejectionType,
+    UnitEvaluation,
+)
+from docling_eval.evaluators.stats import DatasetStatistics, compute_stats
+
+_log = logging.getLogger(__name__)
+
+
+class DatasetTimingsEvaluation(DatasetEvaluation):
+    """Dataset timing evaluation."""
+
+    timing_per_document_stats: DatasetStatistics
+    timing_per_page_stats: DatasetStatistics
+
+
+class TimingsEvaluator(BaseEvaluator):
+    """Timings evaluator."""
+
+    def __init__(
+        self,
+        intermediate_evaluations_path: Optional[Path] = None,
+        prediction_sources: List[PredictionFormats] = [],
+    ):
+        supported_prediction_formats: List[PredictionFormats] = [
+            PredictionFormats.DOCLING_DOCUMENT,
+        ]
+
+        if not prediction_sources:
+            prediction_sources = supported_prediction_formats
+        super().__init__(
+            intermediate_evaluations_path=intermediate_evaluations_path,
+            prediction_sources=prediction_sources,
+            supported_prediction_formats=supported_prediction_formats,
+        )
+
+    def __call__(
+        self,
+        ds_path: Path,
+        split: str = "test",
+    ) -> DatasetTimingsEvaluation:
+        logging.info("Loading the split '%s' from: '%s'", split, ds_path)
+
+        rejected_samples: Dict[EvaluationRejectionType, int] = {
+            EvaluationRejectionType.INVALID_CONVERSION_STATUS: 0,
+            EvaluationRejectionType.MISSING_PREDICTION: 0,
+            EvaluationRejectionType.MISMATHCED_DOCUMENT: 0,
+        }
+
+        # Load the dataset
+        split_path = str(ds_path / split / "*.parquet")
+        split_files = glob.glob(split_path)
+        logging.info("#-files: %s", len(split_files))
+        ds = load_dataset("parquet", data_files={split: split_files})
+        logging.info("Overview of dataset: %s", ds)
+
+        # Select the split
+        ds_selection: Dataset = ds[split]
+
+        timings = []
+        for i, data in tqdm(
+            enumerate(ds_selection),
+            desc="Timings evaluations",
+            ncols=120,
+            total=len(ds_selection),
+        ):
+            data_record = DatasetRecordWithPrediction.model_validate(data)
+
+            doc_id = data_record.doc_id
+            if data_record.status not in self._accepted_status:
+                _log.error(
+                    "Skipping record without successfull conversion status: %s", doc_id
+                )
+                rejected_samples[EvaluationRejectionType.INVALID_CONVERSION_STATUS] += 1
+                continue
+
+            # print(data_record.prediction_timings)
+            timings.append(data_record.prediction_timings)
+
+        if rejected_samples[EvaluationRejectionType.MISMATHCED_DOCUMENT] > 0:
+            logging.error(
+                "Total mismatched/skipped documents: %s over %s",
+                rejected_samples[EvaluationRejectionType.MISMATHCED_DOCUMENT],
+                len(ds_selection),
+            )
+
+        time_per_doc = []
+        time_per_page = []
+
+        for timing in timings:
+
+            if timing is not None:
+                for key, val in timing.items():
+                    if key == "pipeline_total":
+                        time_per_doc.extend(val)
+
+                    if key == "layout":
+                        _time_per_page = [0.0 for v in val]
+                        for k2, v2 in timing.items():
+                            if len(v2) == len(_time_per_page):
+                                for i, v in enumerate(v2):
+                                    _time_per_page[i] += v
+
+                        time_per_page.extend(_time_per_page)
+
+        dataset_timings_evaluation = DatasetTimingsEvaluation(
+            timing_per_document_stats=compute_stats(
+                time_per_doc,
+                max_value_is_one=False,
+                nr_bins=32,
+            ),
+            timing_per_page_stats=compute_stats(
+                time_per_page,
+                max_value_is_one=False,
+                nr_bins=32,
+            ),
+        )
+        return dataset_timings_evaluation
diff --git a/docling_eval/prediction_providers/base_prediction_provider.py b/docling_eval/prediction_providers/base_prediction_provider.py
index d280ed98..a642593d 100644
--- a/docling_eval/prediction_providers/base_prediction_provider.py
+++ b/docling_eval/prediction_providers/base_prediction_provider.py
@@ -8,6 +8,7 @@
 
 from datasets import load_dataset
 from docling.datamodel.base_models import ConversionStatus
+from docling.utils.profiling import ProfilingItem
 from docling.utils.utils import chunkify
 from docling_core.types.doc import DocItemLabel
 from docling_core.types.doc.document import DoclingDocument
@@ -31,7 +32,6 @@
 )
 from docling_eval.visualisation.visualisations import save_comparison_html_with_clusters
 
-# Get logger
 _log = logging.getLogger(__name__)
 
 # Default HTML export labels for visualization
@@ -186,6 +186,7 @@ def create_dataset_record_with_prediction(
         record: DatasetRecord,
         predicted_doc: Optional[DoclingDocument] = None,
         original_prediction: Optional[str] = None,
+        timings: Optional[dict] = None,
     ) -> DatasetRecordWithPrediction:
         """
         Create a dataset record with prediction from an input record.
@@ -215,9 +216,36 @@ def create_dataset_record_with_prediction(
             "predicted_pictures": pred_pictures,
             "original_prediction": original_prediction,
             "prediction_format": self.prediction_format,
+            "prediction_timings": self._prediction_timings(timings),
             "predictor_info": self.info(),
         }
-        return DatasetRecordWithPrediction.model_validate(data)
+        record = DatasetRecordWithPrediction.model_validate(data)
+
+        return record
+
+    def _prediction_timings(self, timings: Optional[dict]) -> Optional[dict]:
+        """Get prediction timings."""
+
+        if isinstance(timings, dict):
+            result = {}
+            for key, val in timings.items():
+                if isinstance(val, ProfilingItem):
+                    result[key] = val.times
+
+            if len(result) == 0:  # datasets does not like empty dicts
+                _log.warning(f"empty timings: {timings}")
+                return None
+
+            # import json
+            # print(json.dumps(result, indent=2))
+
+            return result
+
+        elif timings is None:
+            return None
+        else:
+            _log.warning(f"unknown type of timings: {timings}")
+            return None
 
     def add_prediction(self, record: DatasetRecord) -> DatasetRecordWithPrediction:
         """
diff --git a/docling_eval/prediction_providers/docling_provider.py b/docling_eval/prediction_providers/docling_provider.py
index b86b619b..2a4a2b8c 100644
--- a/docling_eval/prediction_providers/docling_provider.py
+++ b/docling_eval/prediction_providers/docling_provider.py
@@ -1,8 +1,10 @@
 import copy
+import logging
 import platform
 from typing import Dict, List, Optional, Set
 
 from docling.datamodel.base_models import InputFormat
+from docling.datamodel.settings import settings
 from docling.document_converter import DocumentConverter, FormatOption
 from docling_core.types.doc import DocItemLabel
 from pydantic import TypeAdapter
@@ -21,6 +23,8 @@
 )
 from docling_eval.utils.utils import docling_version, get_package_version
 
+_log = logging.getLogger(__name__)
+
 
 class DoclingPredictionProvider(BasePredictionProvider):
     """
@@ -47,6 +51,7 @@ def __init__(
         ignore_missing_predictions: bool = True,
         true_labels: Optional[Set[DocItemLabel]] = None,
         pred_labels: Optional[Set[DocItemLabel]] = None,
+        profile_pipeline_timings: bool = True,
     ):
         """
         Initialize the Docling prediction provider.
@@ -65,6 +70,11 @@ def __init__(
             true_labels=true_labels,
             pred_labels=pred_labels,
         )
+
+        # Enable the profiling to measure the time spent
+        settings.debug.profile_pipeline_timings = profile_pipeline_timings
+        _log.info(f"profile_pipeline_timings: {profile_pipeline_timings}")
+
         self.doc_converter = DocumentConverter(format_options=format_options)
 
     @property
@@ -95,9 +105,7 @@ def predict(self, record: DatasetRecord) -> DatasetRecordWithPrediction:
 
         # Create prediction record
         pred_record = self.create_dataset_record_with_prediction(
-            record,
-            res.document,
-            None,
+            record, res.document, None, res.timings
         )
         pred_record.status = res.status
 
diff --git a/docling_eval/visualisation/constants.py b/docling_eval/visualisation/constants.py
index 823c258a..59c12c5a 100644
--- a/docling_eval/visualisation/constants.py
+++ b/docling_eval/visualisation/constants.py
@@ -109,6 +109,99 @@
 </style>
 </head>"""
 
+HTML_DEFAULT_HEAD_FOR_COMP_v2: str = r"""<head>
+<link rel="icon" type="image/png"
+href="https://ds4sd.github.io/docling/assets/logo.png"/>
+<meta charset="UTF-8">
+<title>
+Powered by Docling
+</title>
+<style>
+html {
+  background-color: LightGray;
+}
+body {
+  margin: 0 auto;
+  padding: 10px;
+  background-color: White;
+  font-family: Arial, sans-serif;
+  box-shadow: 10px 10px 10px grey;
+  font-size: 0.9em; /* Smaller text */
+  max-width: 100%;
+}
+td {
+  width: 25%;
+}
+.page td {
+  width:auto;
+}
+/* Create a flex container for columns */
+.container {
+  display: flex;
+  flex-wrap: nowrap;
+  width: 100%;
+  gap: 10px;
+}
+/* Each column takes exactly 25% width */
+.column {
+  flex: 0 0 25%;
+  padding: 10px;
+  box-sizing: border-box;
+  overflow-x: auto; /* Enable horizontal scrolling within each column */
+}
+figure {
+  display: block;
+  width: 100%;
+  margin: 0;
+  margin-top: 10px;
+  margin-bottom: 10px;
+  overflow-x: auto; /* Horizontal scrolling for figures */
+}
+img {
+  display: block;
+  margin: auto;
+  margin-top: 10px;
+  margin-bottom: 10px;
+  max-width: 100%; /* Images will be responsive within their container */
+  height: auto;
+}
+.table-container {
+  width: 100%;
+  overflow-x: auto; /* Horizontal scrolling for tables */
+}
+table {
+  min-width: 500px;
+  background-color: White;
+  border-collapse: collapse;
+  margin: 10px 0;
+  width: 100%;
+}
+th, td {
+  border: 1px solid black;
+  padding: 8px;
+  text-align: left;
+}
+th {
+  font-weight: bold;
+  background-color: #f2f2f2;
+}
+table tr:nth-child(even) td {
+  background-color: LightGray;
+}
+/* Media query for responsive behavior */
+@media (max-width: 768px) {
+  .container {
+    flex-direction: column;
+  }
+  
+  .column {
+    flex: 0 0 100%;
+    margin-bottom: 15px;
+  }
+}
+</style>
+</head>"""
+
 
 HTML_COMPARISON_PAGE_v1 = """<!DOCTYPE html>
 <html lang="en">
diff --git a/docling_eval/visualisation/visualisations.py b/docling_eval/visualisation/visualisations.py
index f782fca6..c145922a 100644
--- a/docling_eval/visualisation/visualisations.py
+++ b/docling_eval/visualisation/visualisations.py
@@ -26,6 +26,7 @@
     HTML_COMPARISON_PAGE_WITH_CLUSTERS,
     HTML_DEFAULT_HEAD_FOR_COMP,
     HTML_INSPECTION,
+    HTML_DEFAULT_HEAD_FOR_COMP_v2,
 )
 
 
@@ -136,7 +137,7 @@ def get_missing_pageimg(width=800, height=1100, text="MISSING PAGE"):
     html_parts = [
         "<!DOCTYPE html>",
         "<html>",
-        HTML_DEFAULT_HEAD_FOR_COMP,
+        HTML_DEFAULT_HEAD_FOR_COMP_v2,
         "<body>",
     ]
 
diff --git a/docs/faq.md b/docs/faq.md
new file mode 100644
index 00000000..9d2f5c67
--- /dev/null
+++ b/docs/faq.md
@@ -0,0 +1,9 @@
+# Frequently Asked Questions
+
+## docling-eval seem stuck
+
+Add the environment variable (in case HF is not responding), 
+
+```sh
+caffeinate HF_HUB_OFFLINE=1 poetry run docling_eval create-eval --benchmark DocLayNetV1 --gt-dir ./benchmarks/DocLayNetV1/gt_dataset --output-dir ./benchmarks/DocLayNetV1/smoldocling_v4 --prediction-provider SmolDocling --end-index 256
+```
\ No newline at end of file
diff --git a/poetry.lock b/poetry.lock
index 5be3a7a7..d6f8f62e 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1482,14 +1482,14 @@ files = [
 
 [[package]]
 name = "docling"
-version = "2.30.0"
+version = "2.31.0"
 description = "SDK and CLI for parsing PDF, DOCX, HTML, and more, to a unified document representation for powering downstream workflows such as gen AI applications."
 optional = false
 python-versions = "<4.0,>=3.9"
 groups = ["main"]
 files = [
-    {file = "docling-2.30.0-py3-none-any.whl", hash = "sha256:88bc3f988116ea100ef1a025b623c94ae0010c11bc183f4773fb852a627d1d5d"},
-    {file = "docling-2.30.0.tar.gz", hash = "sha256:6d31293d84ac9967101e394b7fa1b75be951775c1cb873d18b505e82c8d23c83"},
+    {file = "docling-2.31.0-py3-none-any.whl", hash = "sha256:0a23c709aba5d3aa8f193e2211a7d3084af2b451f1c69deafdf81591179de779"},
+    {file = "docling-2.31.0.tar.gz", hash = "sha256:1115f4cda7e67c70a6a61395aed65133f4e85e86914bdae5153c10a5ed329a71"},
 ]
 
 [package.dependencies]
@@ -1519,8 +1519,8 @@ rtree = ">=1.3.0,<2.0.0"
 scipy = {version = ">=1.6.0,<2.0.0", markers = "python_version >= \"3.10\""}
 tqdm = ">=4.65.0,<5.0.0"
 transformers = [
-    {version = ">=4.46.0,<5.0.0", optional = true, markers = "(sys_platform != \"darwin\" or platform_machine != \"x86_64\") and extra == \"vlm\""},
     {version = ">=4.42.0,<4.43.0", optional = true, markers = "sys_platform == \"darwin\" and platform_machine == \"x86_64\" and extra == \"vlm\""},
+    {version = ">=4.46.0,<5.0.0", optional = true, markers = "(sys_platform != \"darwin\" or platform_machine != \"x86_64\") and extra == \"vlm\""},
 ]
 typer = ">=0.12.5,<0.16.0"
 
@@ -1577,8 +1577,8 @@ docling-core = ">=2.19.0,<3.0.0"
 huggingface_hub = ">=0.23,<1"
 jsonlines = ">=3.1.0,<4.0.0"
 numpy = [
-    {version = ">=1.24.4,<3.0.0", markers = "sys_platform != \"darwin\" or platform_machine != \"x86_64\""},
     {version = ">=1.24.4,<2.0.0", markers = "sys_platform == \"darwin\" and platform_machine == \"x86_64\""},
+    {version = ">=1.24.4,<3.0.0", markers = "sys_platform != \"darwin\" or platform_machine != \"x86_64\""},
 ]
 opencv-python-headless = ">=4.6.0.66,<5.0.0.0"
 Pillow = ">=10.0.0,<12.0.0"
@@ -1588,8 +1588,8 @@ torch = ">=2.2.2,<3.0.0"
 torchvision = ">=0,<1"
 tqdm = ">=4.64.0,<5.0.0"
 transformers = [
-    {version = ">=4.42.0,<5.0.0", markers = "sys_platform != \"darwin\" or platform_machine != \"x86_64\""},
     {version = ">=4.42.0,<4.43.0", markers = "sys_platform == \"darwin\" and platform_machine == \"x86_64\""},
+    {version = ">=4.42.0,<5.0.0", markers = "sys_platform != \"darwin\" or platform_machine != \"x86_64\""},
 ]
 
 [[package]]
@@ -4070,7 +4070,7 @@ description = "CUBLAS native runtime libraries"
 optional = false
 python-versions = ">=3"
 groups = ["main"]
-markers = "platform_machine == \"x86_64\" and platform_system == \"Linux\""
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""
 files = [
     {file = "nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_aarch64.whl", hash = "sha256:0f8aa1706812e00b9f19dfe0cdb3999b092ccb8ca168c0db5b8ea712456fd9b3"},
     {file = "nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl", hash = "sha256:2fc8da60df463fdefa81e323eef2e36489e1c94335b5358bcb38360adf75ac9b"},
@@ -4084,7 +4084,7 @@ description = "CUDA profiling tools runtime libs."
 optional = false
 python-versions = ">=3"
 groups = ["main"]
-markers = "platform_machine == \"x86_64\" and platform_system == \"Linux\""
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""
 files = [
     {file = "nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_aarch64.whl", hash = "sha256:79279b35cf6f91da114182a5ce1864997fd52294a87a16179ce275773799458a"},
     {file = "nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl", hash = "sha256:9dec60f5ac126f7bb551c055072b69d85392b13311fcc1bcda2202d172df30fb"},
@@ -4098,7 +4098,7 @@ description = "NVRTC native runtime libraries"
 optional = false
 python-versions = ">=3"
 groups = ["main"]
-markers = "platform_machine == \"x86_64\" and platform_system == \"Linux\""
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""
 files = [
     {file = "nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_aarch64.whl", hash = "sha256:0eedf14185e04b76aa05b1fea04133e59f465b6f960c0cbf4e37c3cb6b0ea198"},
     {file = "nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl", hash = "sha256:a178759ebb095827bd30ef56598ec182b85547f1508941a3d560eb7ea1fbf338"},
@@ -4112,7 +4112,7 @@ description = "CUDA Runtime native Libraries"
 optional = false
 python-versions = ">=3"
 groups = ["main"]
-markers = "platform_machine == \"x86_64\" and platform_system == \"Linux\""
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""
 files = [
     {file = "nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_aarch64.whl", hash = "sha256:961fe0e2e716a2a1d967aab7caee97512f71767f852f67432d572e36cb3a11f3"},
     {file = "nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl", hash = "sha256:64403288fa2136ee8e467cdc9c9427e0434110899d07c779f25b5c068934faa5"},
@@ -4126,7 +4126,7 @@ description = "cuDNN runtime libraries"
 optional = false
 python-versions = ">=3"
 groups = ["main"]
-markers = "platform_machine == \"x86_64\" and platform_system == \"Linux\""
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""
 files = [
     {file = "nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl", hash = "sha256:165764f44ef8c61fcdfdfdbe769d687e06374059fbb388b6c89ecb0e28793a6f"},
     {file = "nvidia_cudnn_cu12-9.1.0.70-py3-none-win_amd64.whl", hash = "sha256:6278562929433d68365a07a4a1546c237ba2849852c0d4b2262a486e805b977a"},
@@ -4142,7 +4142,7 @@ description = "CUFFT native runtime libraries"
 optional = false
 python-versions = ">=3"
 groups = ["main"]
-markers = "platform_machine == \"x86_64\" and platform_system == \"Linux\""
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""
 files = [
     {file = "nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_aarch64.whl", hash = "sha256:5dad8008fc7f92f5ddfa2101430917ce2ffacd86824914c82e28990ad7f00399"},
     {file = "nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl", hash = "sha256:f083fc24912aa410be21fa16d157fed2055dab1cc4b6934a0e03cba69eb242b9"},
@@ -4159,7 +4159,7 @@ description = "CURAND native runtime libraries"
 optional = false
 python-versions = ">=3"
 groups = ["main"]
-markers = "platform_machine == \"x86_64\" and platform_system == \"Linux\""
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""
 files = [
     {file = "nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_aarch64.whl", hash = "sha256:1f173f09e3e3c76ab084aba0de819c49e56614feae5c12f69883f4ae9bb5fad9"},
     {file = "nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_x86_64.whl", hash = "sha256:a88f583d4e0bb643c49743469964103aa59f7f708d862c3ddb0fc07f851e3b8b"},
@@ -4173,7 +4173,7 @@ description = "CUDA solver native runtime libraries"
 optional = false
 python-versions = ">=3"
 groups = ["main"]
-markers = "platform_machine == \"x86_64\" and platform_system == \"Linux\""
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""
 files = [
     {file = "nvidia_cusolver_cu12-11.6.1.9-py3-none-manylinux2014_aarch64.whl", hash = "sha256:d338f155f174f90724bbde3758b7ac375a70ce8e706d70b018dd3375545fc84e"},
     {file = "nvidia_cusolver_cu12-11.6.1.9-py3-none-manylinux2014_x86_64.whl", hash = "sha256:19e33fa442bcfd085b3086c4ebf7e8debc07cfe01e11513cc6d332fd918ac260"},
@@ -4192,7 +4192,7 @@ description = "CUSPARSE native runtime libraries"
 optional = false
 python-versions = ">=3"
 groups = ["main"]
-markers = "platform_machine == \"x86_64\" and platform_system == \"Linux\""
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""
 files = [
     {file = "nvidia_cusparse_cu12-12.3.1.170-py3-none-manylinux2014_aarch64.whl", hash = "sha256:9d32f62896231ebe0480efd8a7f702e143c98cfaa0e8a76df3386c1ba2b54df3"},
     {file = "nvidia_cusparse_cu12-12.3.1.170-py3-none-manylinux2014_x86_64.whl", hash = "sha256:ea4f11a2904e2a8dc4b1833cc1b5181cde564edd0d5cd33e3c168eff2d1863f1"},
@@ -4209,7 +4209,7 @@ description = "NVIDIA cuSPARSELt"
 optional = false
 python-versions = "*"
 groups = ["main"]
-markers = "platform_machine == \"x86_64\" and platform_system == \"Linux\""
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""
 files = [
     {file = "nvidia_cusparselt_cu12-0.6.2-py3-none-manylinux2014_aarch64.whl", hash = "sha256:067a7f6d03ea0d4841c85f0c6f1991c5dda98211f6302cb83a4ab234ee95bef8"},
     {file = "nvidia_cusparselt_cu12-0.6.2-py3-none-manylinux2014_x86_64.whl", hash = "sha256:df2c24502fd76ebafe7457dbc4716b2fec071aabaed4fb7691a201cde03704d9"},
@@ -4223,7 +4223,7 @@ description = "NVIDIA Collective Communication Library (NCCL) Runtime"
 optional = false
 python-versions = ">=3"
 groups = ["main"]
-markers = "platform_machine == \"x86_64\" and platform_system == \"Linux\""
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""
 files = [
     {file = "nvidia_nccl_cu12-2.21.5-py3-none-manylinux2014_x86_64.whl", hash = "sha256:8579076d30a8c24988834445f8d633c697d42397e92ffc3f63fa26766d25e0a0"},
 ]
@@ -4235,7 +4235,7 @@ description = "Nvidia JIT LTO Library"
 optional = false
 python-versions = ">=3"
 groups = ["main"]
-markers = "platform_machine == \"x86_64\" and platform_system == \"Linux\""
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""
 files = [
     {file = "nvidia_nvjitlink_cu12-12.4.127-py3-none-manylinux2014_aarch64.whl", hash = "sha256:4abe7fef64914ccfa909bc2ba39739670ecc9e820c83ccc7a6ed414122599b83"},
     {file = "nvidia_nvjitlink_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl", hash = "sha256:06b3b9b25bf3f8af351d664978ca26a16d2c5127dbd53c0497e28d1fb9611d57"},
@@ -4249,7 +4249,7 @@ description = "NVIDIA Tools Extension"
 optional = false
 python-versions = ">=3"
 groups = ["main"]
-markers = "platform_machine == \"x86_64\" and platform_system == \"Linux\""
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""
 files = [
     {file = "nvidia_nvtx_cu12-12.4.127-py3-none-manylinux2014_aarch64.whl", hash = "sha256:7959ad635db13edf4fc65c06a6e9f9e55fc2f92596db928d169c0bb031e88ef3"},
     {file = "nvidia_nvtx_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl", hash = "sha256:781e950d9b9f60d8241ccea575b32f5105a5baf4c2351cab5256a24869f12a1a"},
@@ -7330,7 +7330,7 @@ description = "A language and compiler for custom Deep Learning operations"
 optional = false
 python-versions = "*"
 groups = ["main"]
-markers = "platform_machine == \"x86_64\" and platform_system == \"Linux\""
+markers = "platform_system == \"Linux\" and platform_machine == \"x86_64\""
 files = [
     {file = "triton-3.2.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b3e54983cd51875855da7c68ec05c05cf8bb08df361b1d5b69e05e40b0c9bd62"},
     {file = "triton-3.2.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8009a1fb093ee8546495e96731336a33fb8856a38e45bb4ab6affd6dbc3ba220"},
@@ -7370,14 +7370,14 @@ urllib3 = ">=1.26.0"
 
 [[package]]
 name = "typer"
-version = "0.15.2"
+version = "0.12.5"
 description = "Typer, build great CLIs. Easy to code. Based on Python type hints."
 optional = false
 python-versions = ">=3.7"
 groups = ["main"]
 files = [
-    {file = "typer-0.15.2-py3-none-any.whl", hash = "sha256:46a499c6107d645a9c13f7ee46c5d5096cae6f5fc57dd11eccbbb9ae3e44ddfc"},
-    {file = "typer-0.15.2.tar.gz", hash = "sha256:ab2fab47533a813c49fe1f16b1a370fd5819099c00b119e0633df65f22144ba5"},
+    {file = "typer-0.12.5-py3-none-any.whl", hash = "sha256:62fe4e471711b147e3365034133904df3e235698399bc4de2b36c8579298d52b"},
+    {file = "typer-0.12.5.tar.gz", hash = "sha256:f592f089bedcc8ec1b974125d64851029c3b1af145f04aca64d69410f0c9b722"},
 ]
 
 [package.dependencies]
@@ -7980,4 +7980,4 @@ hyperscalers = ["azure-ai-formrecognizer", "azure-common", "azure-core", "boto3"
 [metadata]
 lock-version = "2.1"
 python-versions = "^3.10"
-content-hash = "ad48608fca439c925fd79021a7323b74448f506595a7c79f652a07e9538dbd13"
+content-hash = "0567ed7bf16453af1997ddb9ba9c63b6edc80c59fd7b752457f1f2b1f5f4cf77"
diff --git a/pyproject.toml b/pyproject.toml
index fc672ad8..2007eeeb 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -31,7 +31,7 @@ lxml = "^5.3.0"
 datasets = "^3.2.0"
 apted = "^1.0.3"
 Distance = "^0.1.3"
-docling = {extras = ["vlm"], version = "^2.28.0"}
+docling = {extras = ["vlm"], version = "^2.31.0"}
 matplotlib = "^3.10.0"
 torch = "^2.5.1"
 torchmetrics = "^1.6.0"