From 4369d20f232537d23468d2003f8ee895815e00f0 Mon Sep 17 00:00:00 2001 From: Panos Vagenas <35837085+vagenas@users.noreply.github.com> Date: Wed, 16 Oct 2024 08:35:08 +0200 Subject: [PATCH 1/5] feat: adapt hierarchical chunker to v2 DoclingDocument [skip-ci] Signed-off-by: Panos Vagenas <35837085+vagenas@users.noreply.github.com> --- docling_core/transforms/chunker/__init__.py | 7 +- docling_core/transforms/chunker/base.py | 54 +- .../chunker/hierarchical_chunker.py | 431 +- .../transforms/id_generator/__init__.py | 3 - docling_core/transforms/id_generator/base.py | 2 +- .../id_generator/doc_hash_id_generator.py | 27 - .../transforms/id_generator/uuid_generator.py | 2 +- .../transforms/metadata_extractor/__init__.py | 3 - .../transforms/metadata_extractor/base.py | 2 +- .../simple_metadata_extractor.py | 59 - docling_core/types/doc/document.py | 3 +- test/data/chunker/0_inp_dl_doc.json | 17638 +++++++++++++++- test/data/chunker/0_out_chunks.json | 3858 ++++ ...ut_chunks_heading_in_meta_with_extras.json | 51 - ...ut_chunks_heading_in_meta_wout_extras.json | 23 - ...ut_chunks_heading_in_text_with_extras.json | 59 - ...ut_chunks_heading_in_text_wout_extras.json | 24 - test/data/chunker/1_out_chunks.json | 4048 ++++ test/test_chunk.py | 16 - test/test_hierarchical_chunker.py | 47 +- 20 files changed, 25275 insertions(+), 1082 deletions(-) delete mode 100644 docling_core/transforms/id_generator/doc_hash_id_generator.py delete mode 100644 docling_core/transforms/metadata_extractor/simple_metadata_extractor.py create mode 100644 test/data/chunker/0_out_chunks.json delete mode 100644 test/data/chunker/0_out_chunks_heading_in_meta_with_extras.json delete mode 100644 test/data/chunker/0_out_chunks_heading_in_meta_wout_extras.json delete mode 100644 test/data/chunker/0_out_chunks_heading_in_text_with_extras.json delete mode 100644 test/data/chunker/0_out_chunks_heading_in_text_wout_extras.json create mode 100644 test/data/chunker/1_out_chunks.json delete mode 100644 test/test_chunk.py diff --git a/docling_core/transforms/chunker/__init__.py b/docling_core/transforms/chunker/__init__.py index f476d0ce..8b5585ba 100644 --- a/docling_core/transforms/chunker/__init__.py +++ b/docling_core/transforms/chunker/__init__.py @@ -5,11 +5,8 @@ """Define the chunker types.""" -from docling_core.transforms.chunker.base import ( # noqa - BaseChunker, - Chunk, - ChunkWithMetadata, -) +from docling_core.transforms.chunker.base import BaseChunk, BaseChunker # noqa from docling_core.transforms.chunker.hierarchical_chunker import ( # noqa + Chunk, HierarchicalChunker, ) diff --git a/docling_core/transforms/chunker/base.py b/docling_core/transforms/chunker/base.py index 88ac9ff3..cf39518c 100644 --- a/docling_core/transforms/chunker/base.py +++ b/docling_core/transforms/chunker/base.py @@ -4,71 +4,35 @@ # """Define base classes for chunking.""" -import re from abc import ABC, abstractmethod -from typing import Final, Iterator, Optional +from typing import Any, Iterator -from pydantic import BaseModel, Field, field_validator +from pydantic import BaseModel -from docling_core.types import BoundingBox, Document -from docling_core.types.base import _JSON_POINTER_REGEX +from docling_core.types.doc import DoclingDocument as DLDocument -# (subset of) JSONPath format, e.g. "$.main-text[84]" (for migration purposes) -_DEPRECATED_JSON_PATH_PATTERN: Final = re.compile(r"^\$\.([\w-]+)\[(\d+)\]$") +class BaseChunk(BaseModel): + """Data model for base chunk.""" -def _create_path(pos: int, path_prefix: str = "main-text") -> str: - return f"#/{path_prefix}/{pos}" - - -class Chunk(BaseModel): - """Data model for Chunk.""" - - path: str = Field(pattern=_JSON_POINTER_REGEX) text: str - heading: Optional[str] = None - - @field_validator("path", mode="before") - @classmethod - def _json_pointer_from_json_path(cls, path: str): - if (match := _DEPRECATED_JSON_PATH_PATTERN.match(path)) is not None: - groups = match.groups() - if len(groups) == 2 and groups[0] is not None and groups[1] is not None: - return _create_path( - pos=int(groups[1]), - path_prefix=groups[0], - ) - return path - - -class ChunkWithMetadata(Chunk): - """Data model for Chunk including metadata.""" - - page: Optional[int] = None - bbox: Optional[BoundingBox] = None + meta: Any = None class BaseChunker(BaseModel, ABC): """Base class for Chunker.""" @abstractmethod - def chunk(self, dl_doc: Document, **kwargs) -> Iterator[Chunk]: + def chunk(self, dl_doc: DLDocument, **kwargs) -> Iterator[BaseChunk]: """Chunk the provided document. Args: - dl_doc (Document): document to chunk + dl_doc (DLDocument): document to chunk Raises: NotImplementedError: in this abstract implementation Yields: - Iterator[Chunk]: iterator over extracted chunks + Iterator[BaseChunk]: iterator over extracted chunks """ raise NotImplementedError() - - @classmethod - def _create_path(cls, pos: int, path_prefix: str = "main-text") -> str: - return _create_path( - pos=pos, - path_prefix=path_prefix, - ) diff --git a/docling_core/transforms/chunker/hierarchical_chunker.py b/docling_core/transforms/chunker/hierarchical_chunker.py index 3c0ec789..d9315f47 100644 --- a/docling_core/transforms/chunker/hierarchical_chunker.py +++ b/docling_core/transforms/chunker/hierarchical_chunker.py @@ -8,347 +8,152 @@ from __future__ import annotations import logging -from enum import Enum -from typing import Any, Iterator, Optional, Union +from typing import Any, ClassVar, Iterator, Optional -import pandas as pd -from pydantic import BaseModel, Field, PositiveInt +from pydantic import BaseModel, Field, conlist -from docling_core.transforms.chunker import BaseChunker, Chunk, ChunkWithMetadata -from docling_core.types import BaseText -from docling_core.types import Document as DLDocument -from docling_core.types import Ref, Table +from docling_core.transforms.chunker import BaseChunker +from docling_core.transforms.chunker.base import BaseChunk +from docling_core.types.doc import DoclingDocument as DLDocument +from docling_core.types.doc.document import ( + DocItem, + LevelNumber, + ListItem, + SectionHeaderItem, + TextItem, +) +from docling_core.types.doc.labels import DocItemLabel + +_KEY_PATHS = "paths" +_KEY_PROVS = "provs" +_KEY_HEADINGS = "headings" + +_KEY_DOC_ITEMS = "doc_items" _logger = logging.getLogger(__name__) -class HierarchicalChunker(BaseChunker): - """Chunker implementation leveraging the document layout.""" +class ChunkMeta(BaseModel): + """Data model for specific chunk metadata.""" - heading_as_metadata: bool = Field( - default=False, - description="Whether heading should be in metadata (instead of text)", + # TODO align paths typewith _JSON_POINTER_REGEX + doc_items: conlist(DocItem, min_length=1) = Field( # type: ignore + default=None, + alias=_KEY_DOC_ITEMS, ) - include_metadata: bool = Field( - default=True, - description="Whether to include extras in the metadata", + headings: Optional[conlist(str, min_length=1)] = Field( # type: ignore + default=None, + alias=_KEY_HEADINGS, ) - min_chunk_len: PositiveInt = Field( - default=64, description="Minimum chunk text length to consider (in chars)" - ) - - class _NodeType(str, Enum): - PARAGRAPH = "paragraph" - SUBTITLE_LEVEL_1 = "subtitle-level-1" - TABLE = "table" - CAPTION = "caption" - - class _NodeName(str, Enum): - TITLE = "title" - REFERENCE = "reference" - LIST_ITEM = "list-item" - SUBTITLE_LEVEL_1 = "subtitle-level-1" - - _allowed_types: list[str] = [ - _NodeType.PARAGRAPH, - _NodeType.SUBTITLE_LEVEL_1, - _NodeType.TABLE, - _NodeType.CAPTION, - ] - _disallowed_names_by_type: dict[str, list[str]] = { - _NodeType.PARAGRAPH: [ - _NodeName.REFERENCE, - ], - } - - @classmethod - def _norm(cls, text: Optional[str]) -> Optional[str]: - return text.lower() if text is not None else None - - @classmethod - def _convert_table_to_dataframe(cls, table: Table) -> Optional[pd.DataFrame]: - if table.data: - table_content = [[cell.text for cell in row] for row in table.data] - return pd.DataFrame(table_content) - else: - return None - - @classmethod - def _triplet_serialize(cls, table) -> Optional[str]: - output_text: Optional[str] = None - table_df = cls._convert_table_to_dataframe(table) - if table_df is not None and table_df.shape[0] > 1 and table_df.shape[1] > 1: - rows = [item.strip() for item in table_df.iloc[:, 0].to_list()] - cols = [item.strip() for item in table_df.iloc[0, :].to_list()] - nrows = table_df.shape[0] - ncols = table_df.shape[1] - texts = [ - f"{rows[i]}, {cols[j]} = {str(table_df.iloc[i, j]).strip()}" - for i in range(1, nrows) - for j in range(1, ncols) - ] - output_text = ". ".join(texts) - - return output_text - - class _MainTextItemNode(BaseModel): - parent: Optional[int] = None - children: list[int] = [] - - class _TitleInfo(BaseModel): - text: str - path_in_doc: str - - class _GlobalContext(BaseModel): - title: Optional[_HC._TitleInfo] = None - - class _DocContext(BaseModel): - dmap: dict[int, _HC._MainTextItemNode] # main text element context - glob: _HC._GlobalContext # global context - - @classmethod - def from_doc(cls, doc: DLDocument) -> _HC._DocContext: - dmap: dict[int, _HC._MainTextItemNode] = {} - glob: _HC._GlobalContext = _HC._GlobalContext() - if doc.description.title: - glob.title = _HC._TitleInfo( - text=doc.description.title, - path_in_doc="description.title", - ) - - parent = None - if doc.main_text: - idx = 0 - while idx < len(doc.main_text): - item = doc.main_text[idx] - if ( - not glob.title - and isinstance(item, BaseText) - and _HC._norm(item.name) == _HC._NodeName.TITLE - ): - glob.title = _HC._TitleInfo( - text=item.text, - path_in_doc=_HC._create_path(idx), - ) - - # start of a subtitle-level-1 parent - if ( - isinstance(item, BaseText) - and _HC._norm(item.obj_type) == _HC._NodeType.SUBTITLE_LEVEL_1 - ): - dmap[idx] = _HC._MainTextItemNode(parent=None) - parent = idx - if not glob.title: - glob.title = _HC._TitleInfo( - text=item.text, - path_in_doc=_HC._create_path(idx), - ) - - # start of a list parent - elif ( - isinstance(item, BaseText) - and _HC._norm(item.name) != _HC._NodeName.LIST_ITEM - and idx + 1 < len(doc.main_text) - and _HC._norm(doc.main_text[idx + 1].name) - == _HC._NodeName.LIST_ITEM - ): - if parent is not None: - dmap[parent].children.append(idx) - dmap[idx] = _HC._MainTextItemNode(parent=parent) - # have all children register locally - li = idx + 1 - while ( - li < len(doc.main_text) - and _HC._norm(doc.main_text[li].name) - == _HC._NodeName.LIST_ITEM - ): - dmap[idx].children.append(li) - dmap[li] = _HC._MainTextItemNode(parent=idx) - li += 1 - idx = li - continue + excluded_embed: ClassVar[list[str]] = [_KEY_DOC_ITEMS] + excluded_llm: ClassVar[list[str]] = [_KEY_DOC_ITEMS] - # normal case - else: - if parent is not None: - dmap[parent].children.append(idx) - dmap[idx] = _HC._MainTextItemNode(parent=parent) + def export_json_dict(self) -> dict[str, Any]: + """Helper method for exporting non-None keys to JSON mode. - idx += 1 - else: - pass - return cls( - dmap=dmap, - glob=glob, - ) + Returns: + dict[str, Any]: The exported dictionary. + """ + return self.model_dump(mode="json", by_alias=True, exclude_none=True) - class _TextEntry(BaseModel): - text: str - path: str - def _build_chunk_impl( - self, doc: DLDocument, doc_map: _DocContext, idx: int, rec: bool = False - ) -> tuple[list[_TextEntry], Optional[str]]: - if doc.main_text: - item = doc.main_text[idx] - item_type = _HC._norm(item.obj_type) - item_name = _HC._norm(item.name) - if ( - item_type not in self._allowed_types - or item_name in self._disallowed_names_by_type.get(item_type, []) - ): - return [], None +class Chunk(BaseChunk): + """Data model for specific chunk.""" - c2p = doc_map.dmap + meta: ChunkMeta - text_entries: list[_HC._TextEntry] = [] - if ( - isinstance(item, Ref) - and item_type == _HC._NodeType.TABLE - and doc.tables - ): - # resolve table reference - ref_nr = int(item.ref.split("/")[2]) # e.g. '#/tables/0' - table = doc.tables[ref_nr] - ser_out = _HC._triplet_serialize(table) - if table.data: - text_entries = ( - [ - self._TextEntry( - text=ser_out, - path=self._create_path(idx), - ) - ] - if ser_out - else [] - ) - else: - return [], None - elif isinstance(item, BaseText): - text_entries = [ - self._TextEntry( - text=item.text, - path=self._create_path(idx), - ) - ] - - # squash in any children of type list-item - if not rec: - if ( - c2p[idx].children - and _HC._norm(doc.main_text[c2p[idx].children[0]].name) - == _HC._NodeName.LIST_ITEM - ): - text_entries = text_entries + [ - self._TextEntry( - text=doc.main_text[c].text, # type: ignore[union-attr] - path=self._create_path(c), - ) - for c in c2p[idx].children - if isinstance(doc.main_text[c], BaseText) - and _HC._norm(doc.main_text[c].name) == _HC._NodeName.LIST_ITEM - ] - elif item_name in [ - _HC._NodeName.LIST_ITEM, - _HC._NodeName.SUBTITLE_LEVEL_1, - ]: - return [], None + def export_json_dict(self) -> dict[str, Any]: + """Helper method for exporting non-None keys to JSON mode. - if (parent := c2p[idx].parent) is not None: - # prepend with ancestors + Returns: + dict[str, Any]: The exported dictionary. + """ + return self.model_dump(mode="json", by_alias=True, exclude_none=True) - parent_res = self._build_chunk_impl( - doc=doc, doc_map=doc_map, idx=parent, rec=True - ) - return ( - parent_res[0] + text_entries, # expanded text - parent_res[1], # heading - ) - else: - if ( - self.heading_as_metadata - and isinstance(item, BaseText) - and _HC._norm(item.obj_type) == _HC._NodeType.SUBTITLE_LEVEL_1 - ): - return [], text_entries[0].text - else: - return text_entries, None - else: - return [], None - def _build_chunk( - self, - doc: DLDocument, - doc_map: _DocContext, - idx: int, - delim: str, - rec: bool = False, - ) -> Optional[Chunk]: - res = self._build_chunk_impl(doc=doc, doc_map=doc_map, idx=idx, rec=rec) - texts = res[0] - heading = res[1] - concat = delim.join([t.text for t in texts if t.text]) - assert doc.main_text is not None - if len(concat) >= self.min_chunk_len: - orig_item = doc.main_text[idx] - item: Union[BaseText, Table] - if isinstance(orig_item, Ref): - if _HC._norm(orig_item.obj_type) == _HC._NodeType.TABLE and doc.tables: - pos = int(orig_item.ref.split("/")[2]) - item = doc.tables[pos] - path = self._create_path(pos, path_prefix="tables") - else: # currently disregarding non-table references - return None - else: - item = orig_item - path = self._create_path(idx) +class HierarchicalChunker(BaseChunker): + """Chunker implementation leveraging the document layout.""" - if self.include_metadata: - return ChunkWithMetadata( - text=concat, - path=path, - heading=heading, - page=item.prov[0].page if item.prov else None, - bbox=item.prov[0].bbox if item.prov else None, - ) - else: - return Chunk( - text=concat, - path=path, - heading=heading, - ) - else: - return None + merge_list_items: bool = True + delim: str = "\n" - def chunk(self, dl_doc: DLDocument, delim="\n", **kwargs: Any) -> Iterator[Chunk]: + def chunk(self, dl_doc: DLDocument, **kwargs: Any) -> Iterator[BaseChunk]: r"""Chunk the provided document. Args: dl_doc (DLDocument): document to chunk - delim (str, optional): delimiter to use when concatenating sub-items. - Defaults to "\n". Yields: Iterator[Chunk]: iterator over extracted chunks """ - if dl_doc.main_text: - # extract doc structure incl. metadata for - # each item (e.g. parent, children) - doc_ctx = self._DocContext.from_doc(doc=dl_doc) - _logger.debug(f"{doc_ctx.model_dump()=}") + heading_by_level: dict[LevelNumber, str] = {} + list_items: list[TextItem] = [] + for item, level in dl_doc.iterate_items(): + + if isinstance(item, DocItem): + + if self.merge_list_items: + if isinstance( + item, ListItem + ) or ( # TODO remove when all captured as ListItem: + isinstance(item, TextItem) + and item.label == DocItemLabel.LIST_ITEM + ): + list_items.append(item) + continue + elif list_items: # need to yield + yield Chunk( + text=self.delim.join([i.text for i in list_items]), + meta=ChunkMeta( + doc_items=list_items, + headings=[ + heading_by_level[k] + for k in sorted(heading_by_level) + ] + or None, + ), + ) + list_items = [] # reset - for i, item in enumerate(dl_doc.main_text): - if ( - isinstance(item, BaseText) - or _HC._norm(item.obj_type) == _HC._NodeType.TABLE + if isinstance( + item, SectionHeaderItem + ) or ( # TODO remove when all captured as SectionHeaderItem: + isinstance(item, TextItem) + and item.label == DocItemLabel.SECTION_HEADER ): - chunk = self._build_chunk( - doc=dl_doc, doc_map=doc_ctx, idx=i, delim=delim - ) - if chunk: - _logger.info(f"{i=}, {chunk=}") - yield chunk - - -_HC = HierarchicalChunker + # TODO second branch not needed after cleanup above: + level = item.level if isinstance(item, SectionHeaderItem) else 1 + heading_by_level[level] = item.text + + # remove headings of higher level as they just went out of scope + keys_to_del = [k for k in heading_by_level if k > level] + for k in keys_to_del: + heading_by_level.pop(k, None) + continue + + if isinstance(item, TextItem) or ( + (not self.merge_list_items) and isinstance(item, ListItem) + ): + text = item.text + else: + continue # TODO refine to ignore some cases & raise otherwise? + c = Chunk( + text=text, + meta=ChunkMeta( + doc_items=[item], + headings=[heading_by_level[k] for k in sorted(heading_by_level)] + or None, + ), + ) + yield c + + if self.merge_list_items and list_items: # need to yield + yield Chunk( + text=self.delim.join([i.text for i in list_items]), + meta=ChunkMeta( + doc_items=list_items, + headings=[heading_by_level[k] for k in sorted(heading_by_level)] + or None, + ), + ) diff --git a/docling_core/transforms/id_generator/__init__.py b/docling_core/transforms/id_generator/__init__.py index 45ae74f3..8eb8d3c8 100644 --- a/docling_core/transforms/id_generator/__init__.py +++ b/docling_core/transforms/id_generator/__init__.py @@ -6,7 +6,4 @@ """Define the ID generator types.""" from docling_core.transforms.id_generator.base import BaseIDGenerator # noqa -from docling_core.transforms.id_generator.doc_hash_id_generator import ( # noqa - DocHashIDGenerator, -) from docling_core.transforms.id_generator.uuid_generator import UUIDGenerator # noqa diff --git a/docling_core/transforms/id_generator/base.py b/docling_core/transforms/id_generator/base.py index f6498a01..74cc1533 100644 --- a/docling_core/transforms/id_generator/base.py +++ b/docling_core/transforms/id_generator/base.py @@ -8,7 +8,7 @@ from abc import ABC, abstractmethod from typing import Any -from docling_core.types import Document as DLDocument +from docling_core.types.doc import DoclingDocument as DLDocument class BaseIDGenerator(ABC): diff --git a/docling_core/transforms/id_generator/doc_hash_id_generator.py b/docling_core/transforms/id_generator/doc_hash_id_generator.py deleted file mode 100644 index deabb057..00000000 --- a/docling_core/transforms/id_generator/doc_hash_id_generator.py +++ /dev/null @@ -1,27 +0,0 @@ -# -# Copyright IBM Corp. 2024 - 2024 -# SPDX-License-Identifier: MIT -# - -"""Doc-hash-based ID generator module.""" - - -from typing import Any - -from docling_core.transforms.id_generator import BaseIDGenerator -from docling_core.types import Document as DLDocument - - -class DocHashIDGenerator(BaseIDGenerator): - """Doc-hash-based ID generator class.""" - - def generate_id(self, doc: DLDocument, *args: Any, **kwargs: Any) -> str: - """Generate an ID for the given document. - - Args: - doc (DLDocument): document to generate ID for - - Returns: - str: the generated ID - """ - return doc.file_info.document_hash diff --git a/docling_core/transforms/id_generator/uuid_generator.py b/docling_core/transforms/id_generator/uuid_generator.py index dda448e8..8e8dcd90 100644 --- a/docling_core/transforms/id_generator/uuid_generator.py +++ b/docling_core/transforms/id_generator/uuid_generator.py @@ -12,7 +12,7 @@ from pydantic import BaseModel, Field from docling_core.transforms.id_generator import BaseIDGenerator -from docling_core.types import Document as DLDocument +from docling_core.types.doc import DoclingDocument as DLDocument class UUIDGenerator(BaseModel, BaseIDGenerator): diff --git a/docling_core/transforms/metadata_extractor/__init__.py b/docling_core/transforms/metadata_extractor/__init__.py index 6c45b296..fc44f7b2 100644 --- a/docling_core/transforms/metadata_extractor/__init__.py +++ b/docling_core/transforms/metadata_extractor/__init__.py @@ -8,6 +8,3 @@ from docling_core.transforms.metadata_extractor.base import ( # noqa BaseMetadataExtractor, ) -from docling_core.transforms.metadata_extractor.simple_metadata_extractor import ( # noqa - SimpleMetadataExtractor, -) diff --git a/docling_core/transforms/metadata_extractor/base.py b/docling_core/transforms/metadata_extractor/base.py index 4385a1ed..6e846785 100644 --- a/docling_core/transforms/metadata_extractor/base.py +++ b/docling_core/transforms/metadata_extractor/base.py @@ -11,7 +11,7 @@ from pydantic import BaseModel -from docling_core.types import Document as DLDocument +from docling_core.types.doc import DoclingDocument as DLDocument class BaseMetadataExtractor(BaseModel, ABC): diff --git a/docling_core/transforms/metadata_extractor/simple_metadata_extractor.py b/docling_core/transforms/metadata_extractor/simple_metadata_extractor.py deleted file mode 100644 index 3ffac033..00000000 --- a/docling_core/transforms/metadata_extractor/simple_metadata_extractor.py +++ /dev/null @@ -1,59 +0,0 @@ -# -# Copyright IBM Corp. 2024 - 2024 -# SPDX-License-Identifier: MIT -# - -"""Simple metadata extractor module.""" - - -from typing import Any, Final - -from docling_core.transforms.metadata_extractor import BaseMetadataExtractor -from docling_core.types import Document as DLDocument - -_DL_DOC_HASH: Final[str] = "dl_doc_hash" -_ORIGIN: Final[str] = "origin" - - -class SimpleMetadataExtractor(BaseMetadataExtractor): - """Simple metadata extractor class.""" - - include_origin: bool = False - - def get_metadata( - self, doc: DLDocument, origin: str, *args: Any, **kwargs: Any - ) -> dict[str, Any]: - """Extract metadata for the given document. - - Args: - doc (DLDocument): document to extract metadata for - origin (str): the document origin - - Returns: - dict[str, Any]: the extracted metadata - """ - meta: dict[str, Any] = { - _DL_DOC_HASH: doc.file_info.document_hash, - } - if self.include_origin: - meta[_ORIGIN] = origin - return meta - - def get_excluded_embed_metadata_keys(self) -> list[str]: - """Get metadata keys to exclude from embedding. - - Returns: - list[str]: the metadata to exclude - """ - excl_keys: list[str] = [_DL_DOC_HASH] - if self.include_origin: - excl_keys.append(_ORIGIN) - return excl_keys - - def get_excluded_llm_metadata_keys(self) -> list[str]: - """Get metadata keys to exclude from LLM generation. - - Returns: - list[str]: the metadata to exclude - """ - return self.get_excluded_embed_metadata_keys() diff --git a/docling_core/types/doc/document.py b/docling_core/types/doc/document.py index 3ff9b421..95e81d39 100644 --- a/docling_core/types/doc/document.py +++ b/docling_core/types/doc/document.py @@ -29,7 +29,8 @@ from docling_core.types.legacy_doc.tokens import DocumentToken Uint64 = typing.Annotated[int, Field(ge=0, le=(2**64 - 1))] -LevelNumber = typing.Annotated[int, Field(ge=1, le=100)] +MAX_LEVEL_NR = 100 +LevelNumber = typing.Annotated[int, Field(ge=1, le=MAX_LEVEL_NR)] CURRENT_VERSION: Final = "1.0.0" DEFAULT_EXPORT_LABELS = { diff --git a/test/data/chunker/0_inp_dl_doc.json b/test/data/chunker/0_inp_dl_doc.json index f94d4993..fcd336f8 100644 --- a/test/data/chunker/0_inp_dl_doc.json +++ b/test/data/chunker/0_inp_dl_doc.json @@ -1,503 +1,17307 @@ { - "name": "", - "description": { - "logs": [] + "schema_name": "DoclingDocument", + "version": "1.0.0", + "description": {}, + "name": "2408.09869v3", + "origin": { + "mimetype": "application/pdf", + "binary_hash": 14981478401387673002, + "filename": "2408.09869v3.pdf" }, - "main_text": [ + "furniture": { + "self_ref": "#/furniture", + "children": [], + "name": "_root_", + "label": "unspecified" + }, + "body": { + "self_ref": "#/body", + "children": [ + { + "$ref": "#/texts/0" + }, + { + "$ref": "#/pictures/0" + }, + { + "$ref": "#/texts/1" + }, + { + "$ref": "#/texts/2" + }, + { + "$ref": "#/texts/3" + }, + { + "$ref": "#/texts/4" + }, + { + "$ref": "#/texts/5" + }, + { + "$ref": "#/texts/6" + }, + { + "$ref": "#/texts/7" + }, + { + "$ref": "#/texts/8" + }, + { + "$ref": "#/texts/9" + }, + { + "$ref": "#/texts/10" + }, + { + "$ref": "#/texts/11" + }, + { + "$ref": "#/texts/12" + }, + { + "$ref": "#/texts/13" + }, + { + "$ref": "#/texts/14" + }, + { + "$ref": "#/texts/15" + }, + { + "$ref": "#/texts/16" + }, + { + "$ref": "#/texts/17" + }, + { + "$ref": "#/texts/18" + }, + { + "$ref": "#/texts/19" + }, + { + "$ref": "#/texts/20" + }, + { + "$ref": "#/texts/21" + }, + { + "$ref": "#/texts/22" + }, + { + "$ref": "#/texts/23" + }, + { + "$ref": "#/texts/24" + }, + { + "$ref": "#/texts/25" + }, + { + "$ref": "#/texts/26" + }, + { + "$ref": "#/texts/27" + }, + { + "$ref": "#/texts/28" + }, + { + "$ref": "#/texts/29" + }, + { + "$ref": "#/texts/30" + }, + { + "$ref": "#/pictures/1" + }, + { + "$ref": "#/texts/31" + }, + { + "$ref": "#/texts/32" + }, + { + "$ref": "#/texts/33" + }, + { + "$ref": "#/texts/34" + }, + { + "$ref": "#/texts/35" + }, + { + "$ref": "#/texts/36" + }, + { + "$ref": "#/texts/37" + }, + { + "$ref": "#/texts/38" + }, + { + "$ref": "#/texts/39" + }, + { + "$ref": "#/texts/40" + }, + { + "$ref": "#/texts/41" + }, + { + "$ref": "#/texts/42" + }, + { + "$ref": "#/texts/43" + }, + { + "$ref": "#/texts/44" + }, + { + "$ref": "#/texts/45" + }, + { + "$ref": "#/texts/46" + }, + { + "$ref": "#/texts/47" + }, + { + "$ref": "#/texts/48" + }, + { + "$ref": "#/texts/49" + }, + { + "$ref": "#/texts/50" + }, + { + "$ref": "#/texts/51" + }, + { + "$ref": "#/texts/52" + }, + { + "$ref": "#/texts/53" + }, + { + "$ref": "#/texts/54" + }, + { + "$ref": "#/texts/55" + }, + { + "$ref": "#/texts/56" + }, + { + "$ref": "#/tables/0" + }, + { + "$ref": "#/texts/57" + }, + { + "$ref": "#/texts/58" + }, + { + "$ref": "#/texts/59" + }, + { + "$ref": "#/texts/60" + }, + { + "$ref": "#/texts/61" + }, + { + "$ref": "#/texts/62" + }, + { + "$ref": "#/texts/63" + }, + { + "$ref": "#/texts/64" + }, + { + "$ref": "#/texts/65" + }, + { + "$ref": "#/texts/66" + }, + { + "$ref": "#/texts/67" + }, + { + "$ref": "#/texts/68" + }, + { + "$ref": "#/texts/69" + }, + { + "$ref": "#/texts/70" + }, + { + "$ref": "#/texts/71" + }, + { + "$ref": "#/texts/72" + }, + { + "$ref": "#/texts/73" + }, + { + "$ref": "#/texts/74" + }, + { + "$ref": "#/texts/75" + }, + { + "$ref": "#/texts/76" + }, + { + "$ref": "#/texts/77" + }, + { + "$ref": "#/texts/78" + }, + { + "$ref": "#/texts/79" + }, + { + "$ref": "#/texts/80" + }, + { + "$ref": "#/texts/81" + }, + { + "$ref": "#/texts/82" + }, + { + "$ref": "#/texts/83" + }, + { + "$ref": "#/texts/84" + }, + { + "$ref": "#/texts/85" + }, + { + "$ref": "#/texts/86" + }, + { + "$ref": "#/texts/87" + }, + { + "$ref": "#/texts/88" + }, + { + "$ref": "#/texts/89" + }, + { + "$ref": "#/texts/90" + }, + { + "$ref": "#/texts/91" + }, + { + "$ref": "#/texts/92" + }, + { + "$ref": "#/texts/93" + }, + { + "$ref": "#/texts/94" + }, + { + "$ref": "#/texts/95" + }, + { + "$ref": "#/texts/96" + }, + { + "$ref": "#/texts/97" + }, + { + "$ref": "#/texts/98" + }, + { + "$ref": "#/texts/99" + }, + { + "$ref": "#/texts/100" + }, + { + "$ref": "#/texts/101" + }, + { + "$ref": "#/texts/102" + }, + { + "$ref": "#/texts/103" + }, + { + "$ref": "#/texts/104" + }, + { + "$ref": "#/texts/105" + }, + { + "$ref": "#/texts/106" + }, + { + "$ref": "#/texts/107" + }, + { + "$ref": "#/texts/108" + }, + { + "$ref": "#/texts/109" + }, + { + "$ref": "#/texts/110" + }, + { + "$ref": "#/texts/111" + }, + { + "$ref": "#/texts/112" + }, + { + "$ref": "#/pictures/2" + }, + { + "$ref": "#/pictures/3" + }, + { + "$ref": "#/pictures/4" + }, + { + "$ref": "#/pictures/5" + }, + { + "$ref": "#/texts/113" + }, + { + "$ref": "#/texts/114" + }, + { + "$ref": "#/texts/115" + }, + { + "$ref": "#/texts/116" + }, + { + "$ref": "#/texts/117" + }, + { + "$ref": "#/texts/118" + }, + { + "$ref": "#/texts/119" + }, + { + "$ref": "#/texts/120" + }, + { + "$ref": "#/texts/121" + }, + { + "$ref": "#/texts/122" + }, + { + "$ref": "#/tables/1" + }, + { + "$ref": "#/texts/123" + }, + { + "$ref": "#/texts/124" + }, + { + "$ref": "#/texts/125" + }, + { + "$ref": "#/texts/126" + }, + { + "$ref": "#/pictures/6" + }, + { + "$ref": "#/texts/127" + }, + { + "$ref": "#/texts/128" + }, + { + "$ref": "#/texts/129" + }, + { + "$ref": "#/texts/130" + }, + { + "$ref": "#/texts/131" + }, + { + "$ref": "#/texts/132" + }, + { + "$ref": "#/tables/2" + }, + { + "$ref": "#/texts/133" + }, + { + "$ref": "#/texts/134" + }, + { + "$ref": "#/texts/135" + }, + { + "$ref": "#/texts/136" + }, + { + "$ref": "#/texts/137" + }, + { + "$ref": "#/texts/138" + }, + { + "$ref": "#/texts/139" + }, + { + "$ref": "#/texts/140" + }, + { + "$ref": "#/texts/141" + }, + { + "$ref": "#/texts/142" + }, + { + "$ref": "#/texts/143" + }, + { + "$ref": "#/tables/3" + }, + { + "$ref": "#/texts/144" + }, + { + "$ref": "#/pictures/7" + }, + { + "$ref": "#/pictures/8" + }, + { + "$ref": "#/texts/145" + }, + { + "$ref": "#/pictures/9" + }, + { + "$ref": "#/texts/146" + }, + { + "$ref": "#/texts/147" + }, + { + "$ref": "#/texts/148" + }, + { + "$ref": "#/texts/149" + } + ], + "name": "_root_", + "label": "unspecified" + }, + "groups": [], + "texts": [ { - "text": "This paragraph is marginally long enough for getting accepted as a chunk.", - "type": "paragraph", - "name": "Text", + "self_ref": "#/texts/0", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "page_header", "prov": [ { - "bbox": [ + "page_no": 1, + "bbox": { + "l": 17.088111877441406, + "t": 583.2296752929688, + "r": 36.339778900146484, + "b": 231.99996948242188, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ 0, - 1, - 2, - 3 - ], - "page": 1, - "span": [ + 38 + ] + } + ], + "orig": "arXiv:2408.09869v3 [cs.CL] 30 Aug 2024", + "text": "arXiv:2408.09869v3 [cs.CL] 30 Aug 2024" + }, + { + "self_ref": "#/texts/1", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "section_header", + "prov": [ + { + "page_no": 1, + "bbox": { + "l": 211.79823303222656, + "t": 567.349365234375, + "r": 399.41156005859375, + "b": 550.5603637695312, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ 0, - 1 + 24 ] } - ] + ], + "orig": "Docling Technical Report", + "text": "Docling Technical Report" }, { - "text": "This one is too short to be accepted as a chunk.", - "type": "paragraph", - "name": "Text", + "self_ref": "#/texts/2", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", "prov": [ { - "bbox": [ - 1, - 2, - 3, - 4 - ], - "page": 1, - "span": [ + "page_no": 1, + "bbox": { + "l": 282.772216796875, + "t": 512.7218017578125, + "r": 328.8624572753906, + "b": 503.340087890625, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ 0, - 1 + 11 ] } - ] + ], + "orig": "Version 1.0", + "text": "Version 1.0" }, { - "text": "Some subtitle", - "type": "subtitle-level-1", - "name": "Section-header", + "self_ref": "#/texts/3", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", "prov": [ { - "bbox": [ - 3, - 4, - 5, - 6 - ], - "page": 2, - "span": [ + "page_no": 1, + "bbox": { + "l": 113.4512939453125, + "t": 482.4101257324219, + "r": 498.396728515625, + "b": 439.45928955078125, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ 0, - 1 + 295 ] } - ] + ], + "orig": "Christoph Auer Maksym Lysak Ahmed Nassar Michele Dolfi Nikolaos Livathinos Panos Vagenas Cesar Berrospi Ramis Matteo Omenetti Fabian Lindlbauer Kasper Dinkla Lokesh Mishra Yusik Kim Shubham Gupta Rafael Teixeira de Lima Valery Weber Lucas Morin Ingmar Meijer Viktor Kuropiatnyk Peter W. J. Staar", + "text": "Christoph Auer Maksym Lysak Ahmed Nassar Michele Dolfi Nikolaos Livathinos Panos Vagenas Cesar Berrospi Ramis Matteo Omenetti Fabian Lindlbauer Kasper Dinkla Lokesh Mishra Yusik Kim Shubham Gupta Rafael Teixeira de Lima Valery Weber Lucas Morin Ingmar Meijer Viktor Kuropiatnyk Peter W. J. Staar" }, { - "text": "Still too short, despite the subtitle above...", - "type": "paragraph", - "name": "Text", + "self_ref": "#/texts/4", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", "prov": [ { - "bbox": [ - 4, - 5, - 6, - 7 - ], - "page": 2, - "span": [ + "page_no": 1, + "bbox": { + "l": 248.43727111816406, + "t": 428.638427734375, + "r": 362.8905029296875, + "b": 407.99810791015625, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ 0, - 1 + 48 ] } - ] + ], + "orig": "AI4K Group, IBM Research Ruschlikon, Switzerland", + "text": "AI4K Group, IBM Research Ruschlikon, Switzerland" }, { - "text": "This one should also include the subtitle above since it is long enough.", - "type": "paragraph", - "name": "Text", + "self_ref": "#/texts/5", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "section_header", + "level": 2, "prov": [ { - "bbox": [ - 5, - 6, - 7, + "page_no": 1, + "bbox": { + "l": 283.0529479980469, + "t": 393.8207092285156, + "r": 328.2636413574219, + "b": 382.4114074707031, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, 8 - ], - "page": 3, - "span": [ + ] + } + ], + "orig": "Abstract", + "text": "Abstract" + }, + { + "self_ref": "#/texts/6", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 1, + "bbox": { + "l": 142.92593383789062, + "t": 364.814697265625, + "r": 468.3847351074219, + "b": 300.651123046875, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ 0, - 1 + 431 ] } - ] + ], + "orig": "This technical report introduces Docling , an easy to use, self-contained, MITlicensed open-source package for PDF document conversion. It is powered by state-of-the-art specialized AI models for layout analysis (DocLayNet) and table structure recognition (TableFormer), and runs efficiently on commodity hardware in a small resource budget. The code interface allows for easy extensibility and addition of new features and models.", + "text": "This technical report introduces Docling , an easy to use, self-contained, MITlicensed open-source package for PDF document conversion. It is powered by state-of-the-art specialized AI models for layout analysis (DocLayNet) and table structure recognition (TableFormer), and runs efficiently on commodity hardware in a small resource budget. The code interface allows for easy extensibility and addition of new features and models." }, { - "text": "Acquisitions", - "type": "subtitle-level-1", - "name": "Section-header", + "self_ref": "#/texts/7", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "section_header", "prov": [ { - "bbox": [ - 6, - 7, - 8, - 9 - ], - "page": 3, - "span": [ + "page_no": 1, + "bbox": { + "l": 107.9176254272461, + "t": 268.3304443359375, + "r": 190.81365966796875, + "b": 257.0544128417969, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ 0, - 1 + 14 ] } - ] + ], + "orig": "1 Introduction", + "text": "1 Introduction" }, { - "type": "table", - "name": "Table", - "$ref": "#/tables/0" + "self_ref": "#/texts/8", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 1, + "bbox": { + "l": 106.98738098144531, + "t": 240.2642822265625, + "r": 504.3785400390625, + "b": 142.53631591796875, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 792 + ] + } + ], + "orig": "Converting PDF documents back into a machine-processable format has been a major challenge for decades due to their huge variability in formats, weak standardization and printing-optimized characteristic, which discards most structural features and metadata. With the advent of LLMs and popular application patterns such as retrieval-augmented generation (RAG), leveraging the rich content embedded in PDFs has become ever more relevant. In the past decade, several powerful document understanding solutions have emerged on the market, most of which are commercial software, cloud offerings [3] and most recently, multi-modal vision-language models. As of today, only a handful of open-source tools cover PDF conversion, leaving a significant feature and quality gap to proprietary solutions.", + "text": "Converting PDF documents back into a machine-processable format has been a major challenge for decades due to their huge variability in formats, weak standardization and printing-optimized characteristic, which discards most structural features and metadata. With the advent of LLMs and popular application patterns such as retrieval-augmented generation (RAG), leveraging the rich content embedded in PDFs has become ever more relevant. In the past decade, several powerful document understanding solutions have emerged on the market, most of which are commercial software, cloud offerings [3] and most recently, multi-modal vision-language models. As of today, only a handful of open-source tools cover PDF conversion, leaving a significant feature and quality gap to proprietary solutions." }, { - "text": "This paragraph should actually include the latest subtitle.", - "type": "paragraph", - "name": "Text", + "self_ref": "#/texts/9", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", "prov": [ { - "bbox": [ - 7, - 8, - 9, - 10 - ], - "page": 4, - "span": [ + "page_no": 1, + "bbox": { + "l": 107.0031967163086, + "t": 136.7283935546875, + "r": 504.04998779296875, + "b": 83.30133056640625, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ 0, - 1 + 488 ] } - ] + ], + "orig": "With Docling , we open-source a very capable and efficient document conversion tool which builds on the powerful, specialized AI models and datasets for layout analysis and table structure recognition we developed and presented in the recent past [12, 13, 9]. Docling is designed as a simple, self-contained python library with permissive license, running entirely locally on commodity hardware. Its code architecture allows for easy extensibility and addition of new features and models.", + "text": "With Docling , we open-source a very capable and efficient document conversion tool which builds on the powerful, specialized AI models and datasets for layout analysis and table structure recognition we developed and presented in the recent past [12, 13, 9]. Docling is designed as a simple, self-contained python library with permissive license, running entirely locally on commodity hardware. Its code architecture allows for easy extensibility and addition of new features and models." }, { - "text": "This paragraph is right before the list.", - "type": "paragraph", - "name": "Text", + "self_ref": "#/texts/10", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "page_footer", "prov": [ { - "bbox": [ - 8, - 9, - 10, - 11 - ], - "page": 4, - "span": [ + "page_no": 1, + "bbox": { + "l": 107.10411071777344, + "t": 58.48394775390625, + "r": 200.8249969482422, + "b": 49.8505859375, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ 0, - 1 + 24 ] } - ] + ], + "orig": "Docling Technical Report", + "text": "Docling Technical Report" }, { - "text": "Some first bullet content here.", - "type": "paragraph", - "name": "List-item", + "self_ref": "#/texts/11", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "page_footer", "prov": [ { - "bbox": [ - 9, - 10, - 11, - 12 - ], - "page": 4, - "span": [ + "page_no": 1, + "bbox": { + "l": 303.50897216796875, + "t": 49.50579833984375, + "r": 308.4902648925781, + "b": 39.960147857666016, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ 0, 1 ] } - ] + ], + "orig": "1", + "text": "1" }, { - "text": "And then some second bullet content here.", - "type": "paragraph", - "name": "List-item", + "self_ref": "#/texts/12", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", "prov": [ { - "bbox": [ - 10, - 11, - 12, - 13 - ], - "page": 4, - "span": [ + "page_no": 2, + "bbox": { + "l": 107.48941802978516, + "t": 717.5628662109375, + "r": 253.97195434570312, + "b": 707.6951293945312, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 36 + ] + } + ], + "orig": "Here is what Docling delivers today:", + "text": "Here is what Docling delivers today:" + }, + { + "self_ref": "#/texts/13", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "list_item", + "prov": [ + { + "page_no": 2, + "bbox": { + "l": 134.6504669189453, + "t": 696.156494140625, + "r": 468.3969421386719, + "b": 686.3217163085938, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 78 + ] + } + ], + "orig": "\u00b7 Converts PDF documents to JSON or Markdown format, stable and lightning fast", + "text": "\u00b7 Converts PDF documents to JSON or Markdown format, stable and lightning fast" + }, + { + "self_ref": "#/texts/14", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "list_item", + "prov": [ + { + "page_no": 2, + "bbox": { + "l": 134.72218322753906, + "t": 681.3009643554688, + "r": 504.0032653808594, + "b": 660.819091796875, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 96 + ] + } + ], + "orig": "\u00b7 Understands detailed page layout, reading order, locates figures and recovers table structures", + "text": "\u00b7 Understands detailed page layout, reading order, locates figures and recovers table structures" + }, + { + "self_ref": "#/texts/15", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "list_item", + "prov": [ + { + "page_no": 2, + "bbox": { + "l": 134.9065399169922, + "t": 655.3751220703125, + "r": 480.8502502441406, + "b": 645.7429809570312, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 86 + ] + } + ], + "orig": "\u00b7 Extracts metadata from the document, such as title, authors, references and language", + "text": "\u00b7 Extracts metadata from the document, such as title, authors, references and language" + }, + { + "self_ref": "#/texts/16", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "list_item", + "prov": [ + { + "page_no": 2, + "bbox": { + "l": 134.8793487548828, + "t": 640.9143676757812, + "r": 333.46343994140625, + "b": 630.7002563476562, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 47 + ] + } + ], + "orig": "\u00b7 Optionally applies OCR, e.g. for scanned PDFs", + "text": "\u00b7 Optionally applies OCR, e.g. for scanned PDFs" + }, + { + "self_ref": "#/texts/17", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "list_item", + "prov": [ + { + "page_no": 2, + "bbox": { + "l": 135.0067901611328, + "t": 626.0984497070312, + "r": 504.003173828125, + "b": 604.8719482421875, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 161 + ] + } + ], + "orig": "\u00b7 Can be configured to be optimal for batch-mode (i.e high throughput, low time-to-solution) or interactive mode (compromise on efficiency, low time-to-solution)", + "text": "\u00b7 Can be configured to be optimal for batch-mode (i.e high throughput, low time-to-solution) or interactive mode (compromise on efficiency, low time-to-solution)" + }, + { + "self_ref": "#/texts/18", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "list_item", + "prov": [ + { + "page_no": 2, + "bbox": { + "l": 134.7841339111328, + "t": 600.127685546875, + "r": 355.41107177734375, + "b": 590.395751953125, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 54 + ] + } + ], + "orig": "\u00b7 Can leverage different accelerators (GPU, MPS, etc).", + "text": "\u00b7 Can leverage different accelerators (GPU, MPS, etc)." + }, + { + "self_ref": "#/texts/19", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "section_header", + "prov": [ + { + "page_no": 2, + "bbox": { + "l": 107.69728088378906, + "t": 573.0997314453125, + "r": 205.29141235351562, + "b": 561.1637573242188, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 17 + ] + } + ], + "orig": "2 Getting Started", + "text": "2 Getting Started" + }, + { + "self_ref": "#/texts/20", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 2, + "bbox": { + "l": 107.22560119628906, + "t": 548.7847900390625, + "r": 504.00341796875, + "b": 506.27606201171875, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 321 + ] + } + ], + "orig": "To use Docling, you can simply install the docling package from PyPI. Documentation and examples are available in our GitHub repository at github.com/DS4SD/docling. All required model assets 1 are downloaded to a local huggingface datasets cache on first use, unless you choose to pre-install the model assets in advance.", + "text": "To use Docling, you can simply install the docling package from PyPI. Documentation and examples are available in our GitHub repository at github.com/DS4SD/docling. All required model assets 1 are downloaded to a local huggingface datasets cache on first use, unless you choose to pre-install the model assets in advance." + }, + { + "self_ref": "#/texts/21", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 2, + "bbox": { + "l": 107.38473510742188, + "t": 499.5434875488281, + "r": 504.0034484863281, + "b": 456.7132263183594, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 371 + ] + } + ], + "orig": "Docling provides an easy code interface to convert PDF documents from file system, URLs or binary streams, and retrieve the output in either JSON or Markdown format. For convenience, separate methods are offered to convert single documents or batches of documents. A basic usage example is illustrated below. Further examples are available in the Doclign code repository.", + "text": "Docling provides an easy code interface to convert PDF documents from file system, URLs or binary streams, and retrieve the output in either JSON or Markdown format. For convenience, separate methods are offered to convert single documents or batches of documents. A basic usage example is illustrated below. Further examples are available in the Doclign code repository." + }, + { + "self_ref": "#/texts/22", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "code", + "prov": [ + { + "page_no": 2, + "bbox": { + "l": 107.45667266845703, + "t": 449.7299499511719, + "r": 491.58642578125, + "b": 380.3858642578125, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 327 + ] + } + ], + "orig": "from docling.document_converter import DocumentConverter source = \"https :// arxiv.org/pdf /2206.01062\" # PDF path or URL converter = DocumentConverter () result = converter.convert_single(source) print(result.render_as_markdown ()) # output: \"## DocLayNet: A Large Human -Annotated Dataset for Document -Layout Analysis [...]\"", + "text": "from docling.document_converter import DocumentConverter source = \"https :// arxiv.org/pdf /2206.01062\" # PDF path or URL converter = DocumentConverter () result = converter.convert_single(source) print(result.render_as_markdown ()) # output: \"## DocLayNet: A Large Human -Annotated Dataset for Document -Layout Analysis [...]\"" + }, + { + "self_ref": "#/texts/23", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 2, + "bbox": { + "l": 107.32361602783203, + "t": 368.8786926269531, + "r": 504.3451843261719, + "b": 315.56304931640625, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 403 + ] + } + ], + "orig": "Optionally, you can configure custom pipeline features and runtime options, such as turning on or off features (e.g. OCR, table structure recognition), enforcing limits on the input document size, and defining the budget of CPU threads. Advanced usage examples and options are documented in the README file. Docling also provides a Dockerfile to demonstrate how to install and run it inside a container.", + "text": "Optionally, you can configure custom pipeline features and runtime options, such as turning on or off features (e.g. OCR, table structure recognition), enforcing limits on the input document size, and defining the budget of CPU threads. Advanced usage examples and options are documented in the README file. Docling also provides a Dockerfile to demonstrate how to install and run it inside a container." + }, + { + "self_ref": "#/texts/24", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "section_header", + "prov": [ + { + "page_no": 2, + "bbox": { + "l": 107.57711029052734, + "t": 298.302734375, + "r": 223.69046020507812, + "b": 286.431884765625, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 21 + ] + } + ], + "orig": "3 Processing pipeline", + "text": "3 Processing pipeline" + }, + { + "self_ref": "#/texts/25", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 2, + "bbox": { + "l": 107.176025390625, + "t": 273.72723388671875, + "r": 504.06005859375, + "b": 176.83807373046875, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 796 + ] + } + ], + "orig": "Docling implements a linear pipeline of operations, which execute sequentially on each given document (see Fig. 1). Each document is first parsed by a PDF backend, which retrieves the programmatic text tokens, consisting of string content and its coordinates on the page, and also renders a bitmap image of each page to support downstream operations. Then, the standard model pipeline applies a sequence of AI models independently on every page in the document to extract features and content, such as layout and table structures. Finally, the results from all pages are aggregated and passed through a post-processing stage, which augments metadata, detects the document language, infers reading-order and eventually assembles a typed document object which can be serialized to JSON or Markdown.", + "text": "Docling implements a linear pipeline of operations, which execute sequentially on each given document (see Fig. 1). Each document is first parsed by a PDF backend, which retrieves the programmatic text tokens, consisting of string content and its coordinates on the page, and also renders a bitmap image of each page to support downstream operations. Then, the standard model pipeline applies a sequence of AI models independently on every page in the document to extract features and content, such as layout and table structures. Finally, the results from all pages are aggregated and passed through a post-processing stage, which augments metadata, detects the document language, infers reading-order and eventually assembles a typed document object which can be serialized to JSON or Markdown." + }, + { + "self_ref": "#/texts/26", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "section_header", + "prov": [ + { + "page_no": 2, + "bbox": { + "l": 107.4563980102539, + "t": 162.30242919921875, + "r": 192.2094268798828, + "b": 152.47781372070312, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 16 + ] + } + ], + "orig": "3.1 PDF backends", + "text": "3.1 PDF backends" + }, + { + "self_ref": "#/texts/27", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 2, + "bbox": { + "l": 107.26972198486328, + "t": 142.07904052734375, + "r": 504.2434997558594, + "b": 87.39227294921875, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 487 + ] + } + ], + "orig": "Two basic requirements to process PDF documents in our pipeline are a) to retrieve all text content and their geometric coordinates on each page and b) to render the visual representation of each page as it would appear in a PDF viewer. Both these requirements are encapsulated in Docling's PDF backend interface. While there are several open-source PDF parsing libraries available for python, we faced major obstacles with all of them for different reasons, among which were restrictive", + "text": "Two basic requirements to process PDF documents in our pipeline are a) to retrieve all text content and their geometric coordinates on each page and b) to render the visual representation of each page as it would appear in a PDF viewer. Both these requirements are encapsulated in Docling's PDF backend interface. While there are several open-source PDF parsing libraries available for python, we faced major obstacles with all of them for different reasons, among which were restrictive" + }, + { + "self_ref": "#/texts/28", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "footnote", + "prov": [ + { + "page_no": 2, + "bbox": { + "l": 120.65299987792969, + "t": 78.96942138671875, + "r": 276.9403076171875, + "b": 69.9141845703125, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 46 + ] + } + ], + "orig": "$^{1}$see huggingface.co/ds4sd/docling-models/", + "text": "$^{1}$see huggingface.co/ds4sd/docling-models/" + }, + { + "self_ref": "#/texts/29", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "page_footer", + "prov": [ + { + "page_no": 2, + "bbox": { + "l": 302.96832275390625, + "t": 49.7403564453125, + "r": 308.49029541015625, + "b": 39.960079193115234, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ 0, 1 ] } - ] - } - ], - "tables": [ + ], + "orig": "2", + "text": "2" + }, { - "#-cols": 3, - "#-rows": 3, - "confidence": 0.99, - "created_by": "high_conf_pred", - "type": "table", - "cells": [], - "data": [ - [ - { - "bbox": [ - 1, - 2, - 3, - 4 - ], - "col": 0, - "col-header": false, - "col-span": [ - 0, - 1 - ], - "row": 0, - "row-header": false, - "row-span": [ - 0, - 1 - ], - "spans": [ - [ - 0, - 0 - ] - ], - "text": "Ignored", - "type": "" - }, - { - "bbox": [ - 1, - 2, - 3, - 4 - ], - "col": 1, - "col-header": false, - "col-span": [ - 1, - 2 - ], - "row": 0, - "row-header": false, - "row-span": [ - 0, - 1 - ], - "spans": [ - [ - 0, - 1 - ] - ], - "text": "Business", - "type": "" - }, - { - "bbox": [ - 1, - 2, - 3, - 4 - ], - "col": 2, - "col-header": false, - "col-span": [ - 2, - 3 - ], - "row": 0, - "row-header": false, - "row-span": [ - 0, - 1 - ], - "spans": [ - [ - 0, - 2 - ] - ], - "text": "Country", - "type": "" - } - ], - [ - { - "bbox": [ - 1, - 2, - 3, - 4 - ], - "col": 0, - "col-header": false, - "col-span": [ - 0, - 1 - ], - "row": 1, - "row-header": false, - "row-span": [ - 1, - 2 - ], - "spans": [ - [ - 1, - 0 - ] - ], - "text": "Atomic Vision", - "type": "" - }, - { - "bbox": [ - 1, - 2, - 3, - 4 - ], - "col": 1, - "col-header": false, - "col-span": [ - 1, - 2 - ], - "row": 1, - "row-header": false, - "row-span": [ - 1, - 2 - ], - "spans": [ - [ - 1, - 1 - ] - ], - "text": "Website design", - "type": "" - }, - { - "bbox": [ - 1, - 2, - 3, - 4 - ], - "col": 2, - "col-header": false, - "col-span": [ - 2, - 3 - ], - "row": 1, - "row-header": false, - "row-span": [ - 1, - 2 - ], - "spans": [ - [ - 1, - 2 - ] - ], - "text": "United States", - "type": "" - } - ], - [ - { - "bbox": [ - 1, - 2, - 3, - 4 - ], - "col": 0, - "col-header": false, - "col-span": [ - 0, - 1 - ], - "row": 2, - "row-header": false, - "row-span": [ - 2, - 3 - ], - "spans": [ - [ - 2, - 0 - ] - ], - "text": "Delix Computer GmbH", - "type": "" - }, - { - "bbox": [ - 1, - 2, - 3, - 4 - ], - "col": 1, - "col-header": false, - "col-span": [ - 1, - 2 - ], - "row": 2, - "row-header": false, - "row-span": [ - 2, - 3 - ], - "spans": [ - [ - 2, - 1 - ] - ], - "text": "Computers and software", - "type": "" - }, - { - "bbox": [ - 1, - 2, - 3, - 4 - ], - "col": 2, - "col-header": false, - "col-span": [ - 2, - 3 - ], - "row": 2, - "row-header": false, - "row-span": [ - 2, - 3 - ], - "spans": [ - [ - 2, - 2 - ] - ], - "text": "Germany", - "type": "" - } - ] + "self_ref": "#/texts/30", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "caption", + "prov": [ + { + "page_no": 3, + "bbox": { + "l": 107.11122131347656, + "t": 570.7063598632812, + "r": 504.00335693359375, + "b": 550.3002319335938, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 134 + ] + } + ], + "orig": "Figure 1: Sketch of Docling's default processing pipeline. The inner part of the model pipeline is easily customizable and extensible.", + "text": "Figure 1: Sketch of Docling's default processing pipeline. The inner part of the model pipeline is easily customizable and extensible." + }, + { + "self_ref": "#/texts/31", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 3, + "bbox": { + "l": 107.37481689453125, + "t": 525.6080932617188, + "r": 504.0033264160156, + "b": 504.8570861816406, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 173 + ] + } ], - "text": "", + "orig": "licensing (e.g. pymupdf [7]), poor speed or unrecoverable quality issues, such as merged text cells across far-apart text tokens or table columns (pypdfium, PyPDF) [15, 14].", + "text": "licensing (e.g. pymupdf [7]), poor speed or unrecoverable quality issues, such as merged text cells across far-apart text tokens or table columns (pypdfium, PyPDF) [15, 14]." + }, + { + "self_ref": "#/texts/32", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", "prov": [ { - "bbox": [ - 8, - 9, - 10, - 11 - ], - "page": 4, - "span": [ + "page_no": 3, + "bbox": { + "l": 107.10971069335938, + "t": 498.21685791015625, + "r": 504.0033874511719, + "b": 443.9909973144531, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ 0, - 0 + 446 ] } - ] - } - ], - "file_info": { - "filename": "", - "document_hash": "" + ], + "orig": "We therefore decided to provide multiple backend choices, and additionally open-source a custombuilt PDF parser, which is based on the low-level qpdf [4] library. It is made available in a separate package named docling-parse and powers the default PDF backend in Docling. As an alternative, we provide a PDF backend relying on pypdfium , which may be a safe backup choice in certain cases, e.g. if issues are seen with particular font encodings.", + "text": "We therefore decided to provide multiple backend choices, and additionally open-source a custombuilt PDF parser, which is based on the low-level qpdf [4] library. It is made available in a separate package named docling-parse and powers the default PDF backend in Docling. As an alternative, we provide a PDF backend relying on pypdfium , which may be a safe backup choice in certain cases, e.g. if issues are seen with particular font encodings." + }, + { + "self_ref": "#/texts/33", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "section_header", + "prov": [ + { + "page_no": 3, + "bbox": { + "l": 107.3078384399414, + "t": 427.0700378417969, + "r": 173.86279296875, + "b": 417.4698181152344, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 13 + ] + } + ], + "orig": "3.2 AI models", + "text": "3.2 AI models" + }, + { + "self_ref": "#/texts/34", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 3, + "bbox": { + "l": 107.07593536376953, + "t": 406.1695251464844, + "r": 504.1148681640625, + "b": 330.2677307128906, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 608 + ] + } + ], + "orig": "As part of Docling, we initially release two highly capable AI models to the open-source community, which have been developed and published recently by our team. The first model is a layout analysis model, an accurate object-detector for page elements [13]. The second model is TableFormer [12, 9], a state-of-the-art table structure recognition model. We provide the pre-trained weights (hosted on huggingface) and a separate package for the inference code as docling-ibm-models . Both models are also powering the open-access deepsearch-experience, our cloud-native service for knowledge exploration tasks.", + "text": "As part of Docling, we initially release two highly capable AI models to the open-source community, which have been developed and published recently by our team. The first model is a layout analysis model, an accurate object-detector for page elements [13]. The second model is TableFormer [12, 9], a state-of-the-art table structure recognition model. We provide the pre-trained weights (hosted on huggingface) and a separate package for the inference code as docling-ibm-models . Both models are also powering the open-access deepsearch-experience, our cloud-native service for knowledge exploration tasks." + }, + { + "self_ref": "#/texts/35", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "section_header", + "prov": [ + { + "page_no": 3, + "bbox": { + "l": 107.62261962890625, + "t": 314.30401611328125, + "r": 206.28106689453125, + "b": 304.31805419921875, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 21 + ] + } + ], + "orig": "Layout Analysis Model", + "text": "Layout Analysis Model" + }, + { + "self_ref": "#/texts/36", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 3, + "bbox": { + "l": 107.1727294921875, + "t": 294.7471923828125, + "r": 504.1613464355469, + "b": 251.51837158203125, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 388 + ] + } + ], + "orig": "Our layout analysis model is an object-detector which predicts the bounding-boxes and classes of various elements on the image of a given page. Its architecture is derived from RT-DETR [16] and re-trained on DocLayNet [13], our popular human-annotated dataset for document-layout analysis, among other proprietary datasets. For inference, our implementation relies on the onnxruntime [5].", + "text": "Our layout analysis model is an object-detector which predicts the bounding-boxes and classes of various elements on the image of a given page. Its architecture is derived from RT-DETR [16] and re-trained on DocLayNet [13], our popular human-annotated dataset for document-layout analysis, among other proprietary datasets. For inference, our implementation relies on the onnxruntime [5]." + }, + { + "self_ref": "#/texts/37", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 3, + "bbox": { + "l": 107.23725891113281, + "t": 245.4161376953125, + "r": 504.00347900390625, + "b": 191.62884521484375, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 443 + ] + } + ], + "orig": "The Docling pipeline feeds page images at 72 dpi resolution, which can be processed on a single CPU with sub-second latency. All predicted bounding-box proposals for document elements are post-processed to remove overlapping proposals based on confidence and size, and then intersected with the text tokens in the PDF to group them into meaningful and complete units such as paragraphs, section titles, list items, captions, figures or tables.", + "text": "The Docling pipeline feeds page images at 72 dpi resolution, which can be processed on a single CPU with sub-second latency. All predicted bounding-box proposals for document elements are post-processed to remove overlapping proposals based on confidence and size, and then intersected with the text tokens in the PDF to group them into meaningful and complete units such as paragraphs, section titles, list items, captions, figures or tables." + }, + { + "self_ref": "#/texts/38", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "section_header", + "prov": [ + { + "page_no": 3, + "bbox": { + "l": 107.68663787841797, + "t": 175.5574951171875, + "r": 228.1627197265625, + "b": 165.8931884765625, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 27 + ] + } + ], + "orig": "Table Structure Recognition", + "text": "Table Structure Recognition" + }, + { + "self_ref": "#/texts/39", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 3, + "bbox": { + "l": 107.22769927978516, + "t": 156.10821533203125, + "r": 504.01800537109375, + "b": 69.84173583984375, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 706 + ] + } + ], + "orig": "The TableFormer model [12], first published in 2022 and since refined with a custom structure token language [9], is a vision-transformer model for table structure recovery. It can predict the logical row and column structure of a given table based on an input image, and determine which table cells belong to column headers, row headers or the table body. Compared to earlier approaches, TableFormer handles many characteristics of tables, such as partial or no borderlines, empty cells, rows or columns, cell spans and hierarchy both on column-heading or row-heading level, tables with inconsistent indentation or alignment and other complexities. For inference, our implementation relies on PyTorch [2].", + "text": "The TableFormer model [12], first published in 2022 and since refined with a custom structure token language [9], is a vision-transformer model for table structure recovery. It can predict the logical row and column structure of a given table based on an input image, and determine which table cells belong to column headers, row headers or the table body. Compared to earlier approaches, TableFormer handles many characteristics of tables, such as partial or no borderlines, empty cells, rows or columns, cell spans and hierarchy both on column-heading or row-heading level, tables with inconsistent indentation or alignment and other complexities. For inference, our implementation relies on PyTorch [2]." + }, + { + "self_ref": "#/texts/40", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "page_footer", + "prov": [ + { + "page_no": 3, + "bbox": { + "l": 302.7810974121094, + "t": 49.40008544921875, + "r": 308.4903259277344, + "b": 39.96010971069336, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 1 + ] + } + ], + "orig": "3", + "text": "3" + }, + { + "self_ref": "#/texts/41", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 4, + "bbox": { + "l": 107.23402404785156, + "t": 717.677001953125, + "r": 504.0035095214844, + "b": 664.2490844726562, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 459 + ] + } + ], + "orig": "The Docling pipeline feeds all table objects detected in the layout analysis to the TableFormer model, by providing an image-crop of the table and the included text cells. TableFormer structure predictions are matched back to the PDF cells in post-processing to avoid expensive re-transcription text in the table image. Typical tables require between 2 and 6 seconds to be processed on a standard CPU, strongly depending on the amount of included table cells.", + "text": "The Docling pipeline feeds all table objects detected in the layout analysis to the TableFormer model, by providing an image-crop of the table and the included text cells. TableFormer structure predictions are matched back to the PDF cells in post-processing to avoid expensive re-transcription text in the table image. Typical tables require between 2 and 6 seconds to be processed on a standard CPU, strongly depending on the amount of included table cells." + }, + { + "self_ref": "#/texts/42", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "section_header", + "prov": [ + { + "page_no": 4, + "bbox": { + "l": 107.63601684570312, + "t": 651.5885009765625, + "r": 130.29388427734375, + "b": 641.6778564453125, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 3 + ] + } + ], + "orig": "OCR", + "text": "OCR" + }, + { + "self_ref": "#/texts/43", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 4, + "bbox": { + "l": 107.0999526977539, + "t": 632.9981689453125, + "r": 504.00347900390625, + "b": 568.0103759765625, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 515 + ] + } + ], + "orig": "Docling provides optional support for OCR, for example to cover scanned PDFs or content in bitmaps images embedded on a page. In our initial release, we rely on EasyOCR [1], a popular thirdparty OCR library with support for many languages. Docling, by default, feeds a high-resolution page image (216 dpi) to the OCR engine, to allow capturing small print detail in decent quality. While EasyOCR delivers reasonable transcription quality, we observe that it runs fairly slow on CPU (upwards of 30 seconds per page).", + "text": "Docling provides optional support for OCR, for example to cover scanned PDFs or content in bitmaps images embedded on a page. In our initial release, we rely on EasyOCR [1], a popular thirdparty OCR library with support for many languages. Docling, by default, feeds a high-resolution page image (216 dpi) to the OCR engine, to allow capturing small print detail in decent quality. While EasyOCR delivers reasonable transcription quality, we observe that it runs fairly slow on CPU (upwards of 30 seconds per page)." + }, + { + "self_ref": "#/texts/44", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 4, + "bbox": { + "l": 107.48332214355469, + "t": 561.5487670898438, + "r": 504.0033874511719, + "b": 540.876953125, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 139 + ] + } + ], + "orig": "We are actively seeking collaboration from the open-source community to extend Docling with additional OCR backends and speed improvements.", + "text": "We are actively seeking collaboration from the open-source community to extend Docling with additional OCR backends and speed improvements." + }, + { + "self_ref": "#/texts/45", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "section_header", + "prov": [ + { + "page_no": 4, + "bbox": { + "l": 107.18000030517578, + "t": 527.0015869140625, + "r": 171.37210083007812, + "b": 516.7918701171875, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 12 + ] + } + ], + "orig": "3.3 Assembly", + "text": "3.3 Assembly" + }, + { + "self_ref": "#/texts/46", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 4, + "bbox": { + "l": 107.259033203125, + "t": 506.85528564453125, + "r": 504.2517395019531, + "b": 431.21771240234375, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 622 + ] + } + ], + "orig": "In the final pipeline stage, Docling assembles all prediction results produced on each page into a well-defined datatype that encapsulates a converted document, as defined in the auxiliary package docling-core . The generated document object is passed through a post-processing model which leverages several algorithms to augment features, such as detection of the document language, correcting the reading order, matching figures with captions and labelling metadata such as title, authors and references. The final output can then be serialized to JSON or transformed into a Markdown representation at the users request.", + "text": "In the final pipeline stage, Docling assembles all prediction results produced on each page into a well-defined datatype that encapsulates a converted document, as defined in the auxiliary package docling-core . The generated document object is passed through a post-processing model which leverages several algorithms to augment features, such as detection of the document language, correcting the reading order, matching figures with captions and labelling metadata such as title, authors and references. The final output can then be serialized to JSON or transformed into a Markdown representation at the users request." + }, + { + "self_ref": "#/texts/47", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "section_header", + "prov": [ + { + "page_no": 4, + "bbox": { + "l": 107.23114776611328, + "t": 417.6996154785156, + "r": 184.1142578125, + "b": 407.6521911621094, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 17 + ] + } + ], + "orig": "3.4 Extensibility", + "text": "3.4 Extensibility" + }, + { + "self_ref": "#/texts/48", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 4, + "bbox": { + "l": 107.01625061035156, + "t": 397.58544921875, + "r": 504.00347900390625, + "b": 311.05523681640625, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 753 + ] + } + ], + "orig": "Docling provides a straight-forward interface to extend its capabilities, namely the model pipeline. A model pipeline constitutes the central part in the processing, following initial document parsing and preceding output assembly, and can be fully customized by sub-classing from an abstract baseclass ( BaseModelPipeline ) or cloning the default model pipeline. This effectively allows to fully customize the chain of models, add or replace models, and introduce additional pipeline configuration parameters. To use a custom model pipeline, the custom pipeline class to instantiate can be provided as an argument to the main document conversion methods. We invite everyone in the community to propose additional or alternative models and improvements.", + "text": "Docling provides a straight-forward interface to extend its capabilities, namely the model pipeline. A model pipeline constitutes the central part in the processing, following initial document parsing and preceding output assembly, and can be fully customized by sub-classing from an abstract baseclass ( BaseModelPipeline ) or cloning the default model pipeline. This effectively allows to fully customize the chain of models, add or replace models, and introduce additional pipeline configuration parameters. To use a custom model pipeline, the custom pipeline class to instantiate can be provided as an argument to the main document conversion methods. We invite everyone in the community to propose additional or alternative models and improvements." + }, + { + "self_ref": "#/texts/49", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 4, + "bbox": { + "l": 106.94336700439453, + "t": 304.5326232910156, + "r": 504.0707092285156, + "b": 262.160400390625, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 330 + ] + } + ], + "orig": "Implementations of model classes must satisfy the python Callable interface. The __call__ method must accept an iterator over page objects, and produce another iterator over the page objects which were augmented with the additional features predicted by the model, by extending the provided PagePredictions data model accordingly.", + "text": "Implementations of model classes must satisfy the python Callable interface. The __call__ method must accept an iterator over page objects, and produce another iterator over the page objects which were augmented with the additional features predicted by the model, by extending the provided PagePredictions data model accordingly." + }, + { + "self_ref": "#/texts/50", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "section_header", + "prov": [ + { + "page_no": 4, + "bbox": { + "l": 107.09239959716797, + "t": 245.8702392578125, + "r": 192.03822326660156, + "b": 234.0104217529297, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 13 + ] + } + ], + "orig": "4 Performance", + "text": "4 Performance" + }, + { + "self_ref": "#/texts/51", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 4, + "bbox": { + "l": 107.0430679321289, + "t": 221.5301513671875, + "r": 504.22869873046875, + "b": 135.16595458984375, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 750 + ] + } + ], + "orig": "In this section, we establish some reference numbers for the processing speed of Docling and the resource budget it requires. All tests in this section are run with default options on our standard test set distributed with Docling, which consists of three papers from arXiv and two IBM Redbooks, with a total of 225 pages. Measurements were taken using both available PDF backends on two different hardware systems: one MacBook Pro M3 Max, and one bare-metal server running Ubuntu 20.04 LTS on an Intel Xeon E5-2690 CPU. For reproducibility, we fixed the thread budget (through setting OMP NUM THREADS environment variable ) once to 4 (Docling default) and once to 16 (equal to full core count on the test hardware). All results are shown in Table 1.", + "text": "In this section, we establish some reference numbers for the processing speed of Docling and the resource budget it requires. All tests in this section are run with default options on our standard test set distributed with Docling, which consists of three papers from arXiv and two IBM Redbooks, with a total of 225 pages. Measurements were taken using both available PDF backends on two different hardware systems: one MacBook Pro M3 Max, and one bare-metal server running Ubuntu 20.04 LTS on an Intel Xeon E5-2690 CPU. For reproducibility, we fixed the thread budget (through setting OMP NUM THREADS environment variable ) once to 4 (Docling default) and once to 16 (equal to full core count on the test hardware). All results are shown in Table 1." + }, + { + "self_ref": "#/texts/52", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 4, + "bbox": { + "l": 107.19568634033203, + "t": 128.8489990234375, + "r": 504.0033874511719, + "b": 96.76458740234375, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 290 + ] + } + ], + "orig": "If you need to run Docling in very low-resource environments, please consider configuring the pypdfium backend. While it is faster and more memory efficient than the default docling-parse backend, it will come at the expense of worse quality results, especially in table structure recovery.", + "text": "If you need to run Docling in very low-resource environments, please consider configuring the pypdfium backend. While it is faster and more memory efficient than the default docling-parse backend, it will come at the expense of worse quality results, especially in table structure recovery." + }, + { + "self_ref": "#/texts/53", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 4, + "bbox": { + "l": 107.47733306884766, + "t": 90.18896484375, + "r": 504.123046875, + "b": 69.5284423828125, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 192 + ] + } + ], + "orig": "Establishing GPU acceleration support for the AI models is currently work-in-progress and largely untested, but may work implicitly when CUDA is available and discovered by the onnxruntime and", + "text": "Establishing GPU acceleration support for the AI models is currently work-in-progress and largely untested, but may work implicitly when CUDA is available and discovered by the onnxruntime and" + }, + { + "self_ref": "#/texts/54", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "page_footer", + "prov": [ + { + "page_no": 4, + "bbox": { + "l": 302.41058349609375, + "t": 49.65472412109375, + "r": 308.49029541015625, + "b": 39.960079193115234, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 1 + ] + } + ], + "orig": "4", + "text": "4" + }, + { + "self_ref": "#/texts/55", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 5, + "bbox": { + "l": 107.42681121826172, + "t": 717.5958862304688, + "r": 504.0035400390625, + "b": 696.97607421875, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 121 + ] + } + ], + "orig": "torch runtimes backing the Docling pipeline. We will deliver updates on this topic at in a future version of this report.", + "text": "torch runtimes backing the Docling pipeline. We will deliver updates on this topic at in a future version of this report." + }, + { + "self_ref": "#/texts/56", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "caption", + "prov": [ + { + "page_no": 5, + "bbox": { + "l": 107.0246810913086, + "t": 686.1126708984375, + "r": 504.30712890625, + "b": 643.7755126953125, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 383 + ] + } + ], + "orig": "Table 1: Runtime characteristics of Docling with the standard model pipeline and settings, on our test dataset of 225 pages, on two different systems. OCR is disabled. We show the time-to-solution (TTS), computed throughput in pages per second, and the peak memory used (resident set size) for both the Docling-native PDF backend and for the pypdfium backend, using 4 and 16 threads.", + "text": "Table 1: Runtime characteristics of Docling with the standard model pipeline and settings, on our test dataset of 225 pages, on two different systems. OCR is disabled. We show the time-to-solution (TTS), computed throughput in pages per second, and the peak memory used (resident set size) for both the Docling-native PDF backend and for the pypdfium backend, using 4 and 16 threads." + }, + { + "self_ref": "#/texts/57", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "section_header", + "prov": [ + { + "page_no": 5, + "bbox": { + "l": 107.46524047851562, + "t": 529.5911254882812, + "r": 190.20550537109375, + "b": 517.6605224609375, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 14 + ] + } + ], + "orig": "5 Applications", + "text": "5 Applications" + }, + { + "self_ref": "#/texts/58", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 5, + "bbox": { + "l": 107.10533142089844, + "t": 504.97296142578125, + "r": 504.0229187011719, + "b": 364.4931335449219, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 1189 + ] + } + ], + "orig": "Thanks to the high-quality, richly structured document conversion achieved by Docling, its output qualifies for numerous downstream applications. For example, Docling can provide a base for detailed enterprise document search, passage retrieval or classification use-cases, or support knowledge extraction pipelines, allowing specific treatment of different structures in the document, such as tables, figures, section structure or references. For popular generative AI application patterns, such as retrieval-augmented generation (RAG), we provide quackling , an open-source package which capitalizes on Docling's feature-rich document output to enable document-native optimized vector embedding and chunking. It plugs in seamlessly with LLM frameworks such as LlamaIndex [8]. Since Docling is fast, stable and cheap to run, it also makes for an excellent choice to build document-derived datasets. With its powerful table structure recognition, it provides significant benefit to automated knowledge-base construction [11, 10]. Docling is also integrated within the open IBM data prep kit [6], which implements scalable data transforms to build large-scale multi-modal training datasets.", + "text": "Thanks to the high-quality, richly structured document conversion achieved by Docling, its output qualifies for numerous downstream applications. For example, Docling can provide a base for detailed enterprise document search, passage retrieval or classification use-cases, or support knowledge extraction pipelines, allowing specific treatment of different structures in the document, such as tables, figures, section structure or references. For popular generative AI application patterns, such as retrieval-augmented generation (RAG), we provide quackling , an open-source package which capitalizes on Docling's feature-rich document output to enable document-native optimized vector embedding and chunking. It plugs in seamlessly with LLM frameworks such as LlamaIndex [8]. Since Docling is fast, stable and cheap to run, it also makes for an excellent choice to build document-derived datasets. With its powerful table structure recognition, it provides significant benefit to automated knowledge-base construction [11, 10]. Docling is also integrated within the open IBM data prep kit [6], which implements scalable data transforms to build large-scale multi-modal training datasets." + }, + { + "self_ref": "#/texts/59", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "section_header", + "prov": [ + { + "page_no": 5, + "bbox": { + "l": 107.30191040039062, + "t": 347.71661376953125, + "r": 283.77734375, + "b": 336.0404357910156, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 31 + ] + } + ], + "orig": "6 Future work and contributions", + "text": "6 Future work and contributions" + }, + { + "self_ref": "#/texts/60", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 5, + "bbox": { + "l": 106.92281341552734, + "t": 323.5386657714844, + "r": 504.00347900390625, + "b": 258.76641845703125, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 543 + ] + } + ], + "orig": "Docling is designed to allow easy extension of the model library and pipelines. In the future, we plan to extend Docling with several more models, such as a figure-classifier model, an equationrecognition model, a code-recognition model and more. This will help improve the quality of conversion for specific types of content, as well as augment extracted document metadata with additional information. Further investment into testing and optimizing GPU acceleration as well as improving the Docling-native PDF backend are on our roadmap, too.", + "text": "Docling is designed to allow easy extension of the model library and pipelines. In the future, we plan to extend Docling with several more models, such as a figure-classifier model, an equationrecognition model, a code-recognition model and more. This will help improve the quality of conversion for specific types of content, as well as augment extracted document metadata with additional information. Further investment into testing and optimizing GPU acceleration as well as improving the Docling-native PDF backend are on our roadmap, too." + }, + { + "self_ref": "#/texts/61", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 5, + "bbox": { + "l": 107.04397583007812, + "t": 252.4183349609375, + "r": 504.0430908203125, + "b": 198.77685546875, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 402 + ] + } + ], + "orig": "We encourage everyone to propose or implement additional features and models, and will gladly take your inputs and contributions under review . The codebase of Docling is open for use and contribution, under the MIT license agreement and in alignment with our contributing guidelines included in the Docling repository. If you use Docling in your projects, please consider citing this technical report.", + "text": "We encourage everyone to propose or implement additional features and models, and will gladly take your inputs and contributions under review . The codebase of Docling is open for use and contribution, under the MIT license agreement and in alignment with our contributing guidelines included in the Docling repository. If you use Docling in your projects, please consider citing this technical report." + }, + { + "self_ref": "#/texts/62", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "section_header", + "prov": [ + { + "page_no": 5, + "bbox": { + "l": 107.45659637451172, + "t": 182.37445068359375, + "r": 163.79928588867188, + "b": 170.54043579101562, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 10 + ] + } + ], + "orig": "References", + "text": "References" + }, + { + "self_ref": "#/texts/63", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "list_item", + "prov": [ + { + "page_no": 5, + "bbox": { + "l": 112.33451843261719, + "t": 163.731201171875, + "r": 504.0009460449219, + "b": 142.08197021484375, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 127 + ] + } + ], + "orig": "[1] J. AI. Easyocr: Ready-to-use ocr with 80+ supported languages. https://github.com/ JaidedAI/EasyOCR , 2024. Version: 1.7.0.", + "text": "[1] J. AI. Easyocr: Ready-to-use ocr with 80+ supported languages. https://github.com/ JaidedAI/EasyOCR , 2024. Version: 1.7.0." + }, + { + "self_ref": "#/texts/64", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "list_item", + "prov": [ + { + "page_no": 5, + "bbox": { + "l": 112.45421600341797, + "t": 134.16204833984375, + "r": 504.0035095214844, + "b": 69.84818267822266, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 543 + ] + } + ], + "orig": "[2] J. Ansel, E. Yang, H. He, N. Gimelshein, A. Jain, M. Voznesensky, B. Bao, P. Bell, D. Berard, E. Burovski, G. Chauhan, A. Chourdia, W. Constable, A. Desmaison, Z. DeVito, E. Ellison, W. Feng, J. Gong, M. Gschwind, B. Hirsh, S. Huang, K. Kalambarkar, L. Kirsch, M. Lazos, M. Lezcano, Y. Liang, J. Liang, Y. Lu, C. Luk, B. Maher, Y. Pan, C. Puhrsch, M. Reso, M. Saroufim, M. Y. Siraichi, H. Suk, M. Suo, P. Tillet, E. Wang, X. Wang, W. Wen, S. Zhang, X. Zhao, K. Zhou, R. Zou, A. Mathews, G. Chanan, P. Wu, and S. Chintala. Pytorch 2: Faster", + "text": "[2] J. Ansel, E. Yang, H. He, N. Gimelshein, A. Jain, M. Voznesensky, B. Bao, P. Bell, D. Berard, E. Burovski, G. Chauhan, A. Chourdia, W. Constable, A. Desmaison, Z. DeVito, E. Ellison, W. Feng, J. Gong, M. Gschwind, B. Hirsh, S. Huang, K. Kalambarkar, L. Kirsch, M. Lazos, M. Lezcano, Y. Liang, J. Liang, Y. Lu, C. Luk, B. Maher, Y. Pan, C. Puhrsch, M. Reso, M. Saroufim, M. Y. Siraichi, H. Suk, M. Suo, P. Tillet, E. Wang, X. Wang, W. Wen, S. Zhang, X. Zhao, K. Zhou, R. Zou, A. Mathews, G. Chanan, P. Wu, and S. Chintala. Pytorch 2: Faster" + }, + { + "self_ref": "#/texts/65", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "page_footer", + "prov": [ + { + "page_no": 5, + "bbox": { + "l": 302.7286376953125, + "t": 49.4200439453125, + "r": 308.49029541015625, + "b": 39.96018600463867, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 1 + ] + } + ], + "orig": "5", + "text": "5" + }, + { + "self_ref": "#/texts/66", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 6, + "bbox": { + "l": 129.0050048828125, + "t": 717.4641723632812, + "r": 504.0033264160156, + "b": 674.812744140625, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 331 + ] + } + ], + "orig": "machine learning through dynamic python bytecode transformation and graph compilation. In Proceedings of the 29th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 2 (ASPLOS '24) . ACM, 4 2024. doi: 10.1145/3620665.3640366. URL https://pytorch.org/assets/pytorch2-2.pdf .", + "text": "machine learning through dynamic python bytecode transformation and graph compilation. In Proceedings of the 29th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 2 (ASPLOS '24) . ACM, 4 2024. doi: 10.1145/3620665.3640366. URL https://pytorch.org/assets/pytorch2-2.pdf ." + }, + { + "self_ref": "#/texts/67", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "list_item", + "prov": [ + { + "page_no": 6, + "bbox": { + "l": 112.47968292236328, + "t": 665.970458984375, + "r": 504.3585510253906, + "b": 634.421630859375, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 255 + ] + } + ], + "orig": "[3] C. Auer, M. Dolfi, A. Carvalho, C. B. Ramis, and P. W. Staar. Delivering document conversion as a cloud service with high throughput and responsiveness. In 2022 IEEE 15th International Conference on Cloud Computing (CLOUD) , pages 363-373. IEEE, 2022.", + "text": "[3] C. Auer, M. Dolfi, A. Carvalho, C. B. Ramis, and P. W. Staar. Delivering document conversion as a cloud service with high throughput and responsiveness. In 2022 IEEE 15th International Conference on Cloud Computing (CLOUD) , pages 363-373. IEEE, 2022." + }, + { + "self_ref": "#/texts/68", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "list_item", + "prov": [ + { + "page_no": 6, + "bbox": { + "l": 112.59274291992188, + "t": 625.3558349609375, + "r": 504.00018310546875, + "b": 603.854736328125, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 113 + ] + } + ], + "orig": "[4] J. Berkenbilt. Qpdf: A content-preserving pdf document transformer, 2024. URL https: //github.com/qpdf/qpdf .", + "text": "[4] J. Berkenbilt. Qpdf: A content-preserving pdf document transformer, 2024. URL https: //github.com/qpdf/qpdf ." + }, + { + "self_ref": "#/texts/69", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "list_item", + "prov": [ + { + "page_no": 6, + "bbox": { + "l": 112.65106964111328, + "t": 595.5201416015625, + "r": 478.88665771484375, + "b": 585.318359375, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 84 + ] + } + ], + "orig": "[5] O. R. developers. Onnx runtime. https://onnxruntime.ai/ , 2024. Version: 1.18.1.", + "text": "[5] O. R. developers. Onnx runtime. https://onnxruntime.ai/ , 2024. Version: 1.18.1." + }, + { + "self_ref": "#/texts/70", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "list_item", + "prov": [ + { + "page_no": 6, + "bbox": { + "l": 112.5077896118164, + "t": 576.7722778320312, + "r": 504.0283508300781, + "b": 544.3335571289062, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 177 + ] + } + ], + "orig": "[6] IBM. Data Prep Kit: a community project to democratize and accelerate unstructured data preparation for LLM app developers, 2024. URL https://github.com/IBM/ data-prep-kit .", + "text": "[6] IBM. Data Prep Kit: a community project to democratize and accelerate unstructured data preparation for LLM app developers, 2024. URL https://github.com/IBM/ data-prep-kit ." + }, + { + "self_ref": "#/texts/71", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "list_item", + "prov": [ + { + "page_no": 6, + "bbox": { + "l": 112.71062469482422, + "t": 536.3712768554688, + "r": 447.4246826171875, + "b": 526.034423828125, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 70 + ] + } + ], + "orig": "[7] A. S. Inc. PyMuPDF, 2024. URL https://github.com/pymupdf/PyMuPDF .", + "text": "[7] A. S. Inc. PyMuPDF, 2024. URL https://github.com/pymupdf/PyMuPDF ." + }, + { + "self_ref": "#/texts/72", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "list_item", + "prov": [ + { + "page_no": 6, + "bbox": { + "l": 112.72732543945312, + "t": 516.6817016601562, + "r": 483.91107177734375, + "b": 506.7769470214844, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 79 + ] + } + ], + "orig": "[8] J. Liu. LlamaIndex, 11 2022. URL https://github.com/jerryjliu/llama_index .", + "text": "[8] J. Liu. LlamaIndex, 11 2022. URL https://github.com/jerryjliu/llama_index ." + }, + { + "self_ref": "#/texts/73", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "list_item", + "prov": [ + { + "page_no": 6, + "bbox": { + "l": 112.50459289550781, + "t": 498.0171203613281, + "r": 504.004638671875, + "b": 444.5917053222656, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 439 + ] + } + ], + "orig": "[9] M. Lysak, A. Nassar, N. Livathinos, C. Auer, and P. Staar. Optimized Table Tokenization for Table Structure Recognition. In Document Analysis and Recognition - ICDAR 2023: 17th International Conference, San Jos'e, CA, USA, August 21-26, 2023, Proceedings, Part II , pages 37-50, Berlin, Heidelberg, Aug. 2023. Springer-Verlag. ISBN 978-3-031-41678-1. doi: 10. 1007/978-3-031-41679-8 3. URL https://doi.org/10.1007/978-3-031-41679-8_3 .", + "text": "[9] M. Lysak, A. Nassar, N. Livathinos, C. Auer, and P. Staar. Optimized Table Tokenization for Table Structure Recognition. In Document Analysis and Recognition - ICDAR 2023: 17th International Conference, San Jos'e, CA, USA, August 21-26, 2023, Proceedings, Part II , pages 37-50, Berlin, Heidelberg, Aug. 2023. Springer-Verlag. ISBN 978-3-031-41678-1. doi: 10. 1007/978-3-031-41679-8 3. URL https://doi.org/10.1007/978-3-031-41679-8_3 ." + }, + { + "self_ref": "#/texts/74", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "list_item", + "prov": [ + { + "page_no": 6, + "bbox": { + "l": 107.49420928955078, + "t": 435.72955322265625, + "r": 504.1082458496094, + "b": 359.86444091796875, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 581 + ] + } + ], + "orig": "[10] L. Mishra, S. Dhibi, Y. Kim, C. Berrospi Ramis, S. Gupta, M. Dolfi, and P. Staar. Statements: Universal information extraction from tables with large language models for ESG KPIs. In D. Stammbach, J. Ni, T. Schimanski, K. Dutia, A. Singh, J. Bingler, C. Christiaen, N. Kushwaha, V. Muccione, S. A. Vaghefi, and M. Leippold, editors, Proceedings of the 1st Workshop on Natural Language Processing Meets Climate Change (ClimateNLP 2024) , pages 193-214, Bangkok, Thailand, Aug. 2024. Association for Computational Linguistics. URL https://aclanthology.org/2024.climatenlp-1.15 .", + "text": "[10] L. Mishra, S. Dhibi, Y. Kim, C. Berrospi Ramis, S. Gupta, M. Dolfi, and P. Staar. Statements: Universal information extraction from tables with large language models for ESG KPIs. In D. Stammbach, J. Ni, T. Schimanski, K. Dutia, A. Singh, J. Bingler, C. Christiaen, N. Kushwaha, V. Muccione, S. A. Vaghefi, and M. Leippold, editors, Proceedings of the 1st Workshop on Natural Language Processing Meets Climate Change (ClimateNLP 2024) , pages 193-214, Bangkok, Thailand, Aug. 2024. Association for Computational Linguistics. URL https://aclanthology.org/2024.climatenlp-1.15 ." + }, + { + "self_ref": "#/texts/75", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "list_item", + "prov": [ + { + "page_no": 6, + "bbox": { + "l": 107.34581756591797, + "t": 351.3507995605469, + "r": 504.6417541503906, + "b": 308.78851318359375, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 288 + ] + } + ], + "orig": "[11] L. Morin, V. Weber, G. I. Meijer, F. Yu, and P. W. J. Staar. Patcid: an open-access dataset of chemical structures in patent documents. Nature Communications , 15(1):6532, August 2024. ISSN 2041-1723. doi: 10.1038/s41467-024-50779-y. URL https://doi.org/10.1038/ s41467-024-50779-y .", + "text": "[11] L. Morin, V. Weber, G. I. Meijer, F. Yu, and P. W. J. Staar. Patcid: an open-access dataset of chemical structures in patent documents. Nature Communications , 15(1):6532, August 2024. ISSN 2041-1723. doi: 10.1038/s41467-024-50779-y. URL https://doi.org/10.1038/ s41467-024-50779-y ." + }, + { + "self_ref": "#/texts/76", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "list_item", + "prov": [ + { + "page_no": 6, + "bbox": { + "l": 107.38827514648438, + "t": 299.4344177246094, + "r": 504.3544616699219, + "b": 268.1841125488281, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 226 + ] + } + ], + "orig": "[12] A. Nassar, N. Livathinos, M. Lysak, and P. Staar. Tableformer: Table structure understanding with transformers. In Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition , pages 4614-4623, 2022.", + "text": "[12] A. Nassar, N. Livathinos, M. Lysak, and P. Staar. Tableformer: Table structure understanding with transformers. In Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition , pages 4614-4623, 2022." + }, + { + "self_ref": "#/texts/77", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "list_item", + "prov": [ + { + "page_no": 6, + "bbox": { + "l": 107.36676788330078, + "t": 258.790283203125, + "r": 504.00341796875, + "b": 238.3961181640625, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 164 + ] + } + ], + "orig": "[13] B. Pfitzmann, C. Auer, M. Dolfi, A. S. Nassar, and P. Staar. Doclaynet: a large humanannotated dataset for document-layout segmentation. pages 3743-3751, 2022.", + "text": "[13] B. Pfitzmann, C. Auer, M. Dolfi, A. S. Nassar, and P. Staar. Doclaynet: a large humanannotated dataset for document-layout segmentation. pages 3743-3751, 2022." + }, + { + "self_ref": "#/texts/78", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "list_item", + "prov": [ + { + "page_no": 6, + "bbox": { + "l": 107.28363800048828, + "t": 229.4072265625, + "r": 504.00091552734375, + "b": 207.166748046875, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 102 + ] + } + ], + "orig": "[14] pypdf Maintainers. pypdf: A Pure-Python PDF Library, 2024. URL https://github.com/ py-pdf/pypdf .", + "text": "[14] pypdf Maintainers. pypdf: A Pure-Python PDF Library, 2024. URL https://github.com/ py-pdf/pypdf ." + }, + { + "self_ref": "#/texts/79", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "list_item", + "prov": [ + { + "page_no": 6, + "bbox": { + "l": 107.2214584350586, + "t": 199.6893310546875, + "r": 504.0008850097656, + "b": 177.491455078125, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 109 + ] + } + ], + "orig": "[15] P. Team. PyPDFium2: Python bindings for PDFium, 2024. URL https://github.com/ pypdfium2-team/pypdfium2 .", + "text": "[15] P. Team. PyPDFium2: Python bindings for PDFium, 2024. URL https://github.com/ pypdfium2-team/pypdfium2 ." + }, + { + "self_ref": "#/texts/80", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "list_item", + "prov": [ + { + "page_no": 6, + "bbox": { + "l": 107.28424072265625, + "t": 169.70806884765625, + "r": 504.0033264160156, + "b": 148.91436767578125, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 128 + ] + } + ], + "orig": "[16] Y. Zhao, W. Lv, S. Xu, J. Wei, G. Wang, Q. Dang, Y. Liu, and J. Chen. Detrs beat yolos on real-time object detection, 2023.", + "text": "[16] Y. Zhao, W. Lv, S. Xu, J. Wei, G. Wang, Q. Dang, Y. Liu, and J. Chen. Detrs beat yolos on real-time object detection, 2023." + }, + { + "self_ref": "#/texts/81", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "page_footer", + "prov": [ + { + "page_no": 6, + "bbox": { + "l": 302.7389221191406, + "t": 49.36236572265625, + "r": 308.5960998535156, + "b": 39.96012496948242, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 1 + ] + } + ], + "orig": "6", + "text": "6" + }, + { + "self_ref": "#/texts/82", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "section_header", + "prov": [ + { + "page_no": 7, + "bbox": { + "l": 107.5181884765625, + "t": 718.9773559570312, + "r": 157.5303955078125, + "b": 706.9950561523438, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 8 + ] + } + ], + "orig": "Appendix", + "text": "Appendix" + }, + { + "self_ref": "#/texts/83", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 7, + "bbox": { + "l": 107.6931381225586, + "t": 694.013671875, + "r": 463.7545471191406, + "b": 684.3182373046875, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 88 + ] + } + ], + "orig": "In this section, we illustrate a few examples of Docling' s output in Markdown and JSON.", + "text": "In this section, we illustrate a few examples of Docling' s output in Markdown and JSON." + }, + { + "self_ref": "#/texts/84", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "section_header", + "prov": [ + { + "page_no": 7, + "bbox": { + "l": 143.85626220703125, + "t": 669.6826171875, + "r": 292.0960998535156, + "b": 654.0538330078125, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 71 + ] + } + ], + "orig": "DocLayNet: A Large Human-Annotated Dataset for Document-Layout Analysis", + "text": "DocLayNet: A Large Human-Annotated Dataset for Document-Layout Analysis" + }, + { + "self_ref": "#/texts/85", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 7, + "bbox": { + "l": 138.0285186767578, + "t": 650.9168701171875, + "r": 176.45944213867188, + "b": 631.6739501953125, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 73 + ] + } + ], + "orig": "Birgit Pfitzmann IBM Research Rueschlikon, Switzerland bpf@zurich.ibm.com", + "text": "Birgit Pfitzmann IBM Research Rueschlikon, Switzerland bpf@zurich.ibm.com" + }, + { + "self_ref": "#/texts/86", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 7, + "bbox": { + "l": 199.22952270507812, + "t": 650.9168701171875, + "r": 237.34890747070312, + "b": 631.6729125976562, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 71 + ] + } + ], + "orig": "Christoph Auer IBM Research Rueschlikon, Switzerland cau@zurich.ibm.com", + "text": "Christoph Auer IBM Research Rueschlikon, Switzerland cau@zurich.ibm.com" + }, + { + "self_ref": "#/texts/87", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 7, + "bbox": { + "l": 260.11895751953125, + "t": 650.9168701171875, + "r": 298.3296203613281, + "b": 631.549072265625, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 70 + ] + } + ], + "orig": "Michele Dolfi IBM Research Rueschlikon, Switzerland dol@zurich.ibm.com", + "text": "Michele Dolfi IBM Research Rueschlikon, Switzerland dol@zurich.ibm.com" + }, + { + "self_ref": "#/texts/88", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 7, + "bbox": { + "l": 168.40359497070312, + "t": 629.259521484375, + "r": 206.98048400878906, + "b": 609.97509765625, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 72 + ] + } + ], + "orig": "Ahmed S. Nassar IBM Research Rueschlikon, Switzerland ahn@zurich.ibm.com", + "text": "Ahmed S. Nassar IBM Research Rueschlikon, Switzerland ahn@zurich.ibm.com" + }, + { + "self_ref": "#/texts/89", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 7, + "bbox": { + "l": 229.48968505859375, + "t": 629.259521484375, + "r": 267.6090393066406, + "b": 610.0166015625, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 68 + ] + } + ], + "orig": "Peter Staar IBM Research Rueschlikon, Switzerland taa@zurich.ibm.com", + "text": "Peter Staar IBM Research Rueschlikon, Switzerland taa@zurich.ibm.com" + }, + { + "self_ref": "#/texts/90", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "section_header", + "prov": [ + { + "page_no": 7, + "bbox": { + "l": 123.37833404541016, + "t": 607.9520263671875, + "r": 146.12112426757812, + "b": 602.521484375, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 8 + ] + } + ], + "orig": "ABSTRACT", + "text": "ABSTRACT" + }, + { + "self_ref": "#/texts/91", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 7, + "bbox": { + "l": 123.66893768310547, + "t": 602.5093994140625, + "r": 214.2318878173828, + "b": 500.3504333496094, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 1599 + ] + } + ], + "orig": "Accurate document layout analysis is a key requirement for highquality PDF document conversion. With the recent availability of public, large ground-truth datasets such as PubLayNet and DocBank, deep-learning models have proven to be very effective at layout detection and segmentation. While these datasets are of adequate size to train such models, they severely lack in layout variability since they are sourced from scientific article repositories such as PubMed and arXiv only. Consequently, the accuracy of the layout segmentation drops significantly when these models are applied on more challenging and diverse layouts. In this paper, we present $_{DocLayNet}$, a new, publicly available, document-layout annotation dataset in COCO format. It contains 80863 manually annotated pages from diverse data sources to represent a wide variability in layouts. For each PDF page, the layout annotations provide labelled bounding-boxes with a choice of 11 distinct classes. DocLayNet also provides a subset of double- and triple-annotated pages to determine the inter-annotator agreement. In multiple experiments, we provide baseline accuracy scores (in mAP) for a set of popular object detection models. We also demonstrate that these models fall approximately 10% behind the inter-annotator agreement. Furthermore, we provide evidence that DocLayNet is of sufficient size. Lastly, we compare models trained on PubLayNet, DocBank and DocLayNet, showing that layout predictions of the DocLayNettrained models are more robust and thus the preferred choice for general-purpose document-layout analysis.", + "text": "Accurate document layout analysis is a key requirement for highquality PDF document conversion. With the recent availability of public, large ground-truth datasets such as PubLayNet and DocBank, deep-learning models have proven to be very effective at layout detection and segmentation. While these datasets are of adequate size to train such models, they severely lack in layout variability since they are sourced from scientific article repositories such as PubMed and arXiv only. Consequently, the accuracy of the layout segmentation drops significantly when these models are applied on more challenging and diverse layouts. In this paper, we present $_{DocLayNet}$, a new, publicly available, document-layout annotation dataset in COCO format. It contains 80863 manually annotated pages from diverse data sources to represent a wide variability in layouts. For each PDF page, the layout annotations provide labelled bounding-boxes with a choice of 11 distinct classes. DocLayNet also provides a subset of double- and triple-annotated pages to determine the inter-annotator agreement. In multiple experiments, we provide baseline accuracy scores (in mAP) for a set of popular object detection models. We also demonstrate that these models fall approximately 10% behind the inter-annotator agreement. Furthermore, we provide evidence that DocLayNet is of sufficient size. Lastly, we compare models trained on PubLayNet, DocBank and DocLayNet, showing that layout predictions of the DocLayNettrained models are more robust and thus the preferred choice for general-purpose document-layout analysis." + }, + { + "self_ref": "#/texts/92", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "section_header", + "prov": [ + { + "page_no": 7, + "bbox": { + "l": 124.08540344238281, + "t": 495.44818115234375, + "r": 155.0667724609375, + "b": 490.0176086425781, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 12 + ] + } + ], + "orig": "CCS CONCEPTS", + "text": "CCS CONCEPTS" + }, + { + "self_ref": "#/texts/93", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 7, + "bbox": { + "l": 124.05064392089844, + "t": 490.005126953125, + "r": 215.08236694335938, + "b": 476.94268798828125, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 174 + ] + } + ], + "orig": "\u00b7 Information systems \u2192 Document structure ; \u00b7 Applied computing \u2192 Document analysis ; \u00b7 Computing methodologies \u2192 Machine learning ; Computer vision ; $_{Object detection}$;", + "text": "\u00b7 Information systems \u2192 Document structure ; \u00b7 Applied computing \u2192 Document analysis ; \u00b7 Computing methodologies \u2192 Machine learning ; Computer vision ; $_{Object detection}$;" + }, + { + "self_ref": "#/texts/94", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 7, + "bbox": { + "l": 123.8716049194336, + "t": 464.7064514160156, + "r": 214.06785583496094, + "b": 436.57623291015625, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 566 + ] + } + ], + "orig": "Permission to make digital or hard copies of part or all of this work for personal or classroom use is granted without fee provided that copies are not made or distributed for profi t or commercial advantage and that copies bear this notice and the full citation on thefirst page. Copyrights for third-party components of this work must be honored. For all other uses, contact the owner/author(s). KDD '22, August 14-18, 2022, Washington, DC, USA \u00a9 2022 Copyright held by the owner/author(s). ACM ISBN 978-1-4503-9385-0/22/08. https://doi.org/10.1145/3534678.3539043", + "text": "Permission to make digital or hard copies of part or all of this work for personal or classroom use is granted without fee provided that copies are not made or distributed for profi t or commercial advantage and that copies bear this notice and the full citation on thefirst page. Copyrights for third-party components of this work must be honored. For all other uses, contact the owner/author(s). KDD '22, August 14-18, 2022, Washington, DC, USA \u00a9 2022 Copyright held by the owner/author(s). ACM ISBN 978-1-4503-9385-0/22/08. https://doi.org/10.1145/3534678.3539043" + }, + { + "self_ref": "#/texts/95", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "section_header", + "prov": [ + { + "page_no": 7, + "bbox": { + "l": 329.12261962890625, + "t": 668.5272216796875, + "r": 521.3091430664062, + "b": 662.5027465820312, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 71 + ] + } + ], + "orig": "DocLayNet: A Large Human-Annotated Dataset for Document-Layout Analysis", + "text": "DocLayNet: A Large Human-Annotated Dataset for Document-Layout Analysis" + }, + { + "self_ref": "#/texts/96", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 7, + "bbox": { + "l": 329.2007141113281, + "t": 657.4287109375, + "r": 433.130126953125, + "b": 653.031005859375, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 73 + ] + } + ], + "orig": "Birgit Pfitzmann IBM Research Rueschlikon, Switzerland bpf@zurich.ibm.com", + "text": "Birgit Pfitzmann IBM Research Rueschlikon, Switzerland bpf@zurich.ibm.com" + }, + { + "self_ref": "#/texts/97", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 7, + "bbox": { + "l": 329.6015930175781, + "t": 648.9207153320312, + "r": 432.7991943359375, + "b": 645.91748046875, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 71 + ] + } + ], + "orig": "Christoph Auer IBM Research Rueschlikon, Switzerland cau@zurich.ibm.com", + "text": "Christoph Auer IBM Research Rueschlikon, Switzerland cau@zurich.ibm.com" + }, + { + "self_ref": "#/texts/98", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 7, + "bbox": { + "l": 329.18927001953125, + "t": 641.90869140625, + "r": 429.5950012207031, + "b": 637.8482666015625, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 70 + ] + } + ], + "orig": "Michele Dolfi IBM Research Rueschlikon, Switzerland dol@zurich.ibm.com", + "text": "Michele Dolfi IBM Research Rueschlikon, Switzerland dol@zurich.ibm.com" + }, + { + "self_ref": "#/texts/99", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 7, + "bbox": { + "l": 329.2640075683594, + "t": 633.8328857421875, + "r": 436.4726867675781, + "b": 629.6668090820312, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 72 + ] + } + ], + "orig": "Ahmed S. Nassar IBM Research Rueschlikon, Switzerland ahn@zurich.ibm.com", + "text": "Ahmed S. Nassar IBM Research Rueschlikon, Switzerland ahn@zurich.ibm.com" + }, + { + "self_ref": "#/texts/100", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 7, + "bbox": { + "l": 329.2624206542969, + "t": 625.7568359375, + "r": 427.5014953613281, + "b": 621.548583984375, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 68 + ] + } + ], + "orig": "Peter Staar IBM Research Rueschlikon, Switzerland taa@zurich.ibm.com", + "text": "Peter Staar IBM Research Rueschlikon, Switzerland taa@zurich.ibm.com" + }, + { + "self_ref": "#/texts/101", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "section_header", + "prov": [ + { + "page_no": 7, + "bbox": { + "l": 329.2585754394531, + "t": 615.97607421875, + "r": 357.9208984375, + "b": 610.2438354492188, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 8 + ] + } + ], + "orig": "ABSTRACT", + "text": "ABSTRACT" + }, + { + "self_ref": "#/texts/102", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 7, + "bbox": { + "l": 328.861083984375, + "t": 604.5524291992188, + "r": 528.3615112304688, + "b": 549.0685424804688, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 1594 + ] + } + ], + "orig": "Accurate document layout analysis is a key requirement for highquality PDF document conversion. With the recent availability of public, large groundtruth datasets such as PubLayNet and DocBank, deep-learning models have proven to be very effective at layout detection and segmentation. While these datasets are of adequate size to train such models, they severely lack in layout variability since they are sourced from scientific article repositories such as PubMed and arXiv only. Consequently, the accuracy of the layout segmentation drops significantly when these models are applied on more challenging and diverse layouts. In this paper, we present DocLayNet , a new, publicly available, document-layout annotation dataset in COCO format. It contains 80863 manually annotated pages from diverse data sources to represent a wide variability in layouts. For each PDF page, the layout annotations provide labelled bounding-boxes with a choice of 11 distinct classes. DocLayNet also provides a subset of double- and triple-annotated pages to determine the inter-annotator agreement. In multiple experiments, we provide baseline accuracy scores (in mAP) for a set of popular object detection models. We also demonstrate that these models fall approximately 10% behind the inter-annotator agreement. Furthermore, we provide evidence that DocLayNet is of sufficient size. Lastly, we compare models trained on PubLayNet, DocBank and DocLayNet, showing that layout predictions of the DocLayNettrained models are more robust and thus the preferred choice for general-purpose document-layout analysis.", + "text": "Accurate document layout analysis is a key requirement for highquality PDF document conversion. With the recent availability of public, large groundtruth datasets such as PubLayNet and DocBank, deep-learning models have proven to be very effective at layout detection and segmentation. While these datasets are of adequate size to train such models, they severely lack in layout variability since they are sourced from scientific article repositories such as PubMed and arXiv only. Consequently, the accuracy of the layout segmentation drops significantly when these models are applied on more challenging and diverse layouts. In this paper, we present DocLayNet , a new, publicly available, document-layout annotation dataset in COCO format. It contains 80863 manually annotated pages from diverse data sources to represent a wide variability in layouts. For each PDF page, the layout annotations provide labelled bounding-boxes with a choice of 11 distinct classes. DocLayNet also provides a subset of double- and triple-annotated pages to determine the inter-annotator agreement. In multiple experiments, we provide baseline accuracy scores (in mAP) for a set of popular object detection models. We also demonstrate that these models fall approximately 10% behind the inter-annotator agreement. Furthermore, we provide evidence that DocLayNet is of sufficient size. Lastly, we compare models trained on PubLayNet, DocBank and DocLayNet, showing that layout predictions of the DocLayNettrained models are more robust and thus the preferred choice for general-purpose document-layout analysis." + }, + { + "self_ref": "#/texts/103", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "section_header", + "prov": [ + { + "page_no": 7, + "bbox": { + "l": 329.3970947265625, + "t": 543.3802490234375, + "r": 370.7042541503906, + "b": 537.7380981445312, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 12 + ] + } + ], + "orig": "CCS CONCEPTS", + "text": "CCS CONCEPTS" + }, + { + "self_ref": "#/texts/104", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 7, + "bbox": { + "l": 329.4852600097656, + "t": 532.8919067382812, + "r": 516.2509155273438, + "b": 523.6624755859375, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 191 + ] + } + ], + "orig": "$_{\u00b7 Information systems }$\u2192$_{ Document structure ; \u00b7 Applied computing }$ \u2192$_{ Document analysis ; \u00b7 Computing methodologies }$\u2192$_{ Machine learning ;}$ Computer vision ; Object detection ;", + "text": "$_{\u00b7 Information systems }$\u2192$_{ Document structure ; \u00b7 Applied computing }$ \u2192$_{ Document analysis ; \u00b7 Computing methodologies }$\u2192$_{ Machine learning ;}$ Computer vision ; Object detection ;" + }, + { + "self_ref": "#/texts/105", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 7, + "bbox": { + "l": 329.1643371582031, + "t": 519.994873046875, + "r": 527.3062133789062, + "b": 506.2882080078125, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 397 + ] + } + ], + "orig": "Permission to make digital or hard copies of part or all of this work for personal or classroom use is granted without fee provided that copies are not made or distributed for profit or commercial advantage and that copies bear this notice and the full citation on the first page. Copyrights for third-party components of this work must be honored. For all other uses, contact the owner/author(s).", + "text": "Permission to make digital or hard copies of part or all of this work for personal or classroom use is granted without fee provided that copies are not made or distributed for profit or commercial advantage and that copies bear this notice and the full citation on the first page. Copyrights for third-party components of this work must be honored. For all other uses, contact the owner/author(s)." + }, + { + "self_ref": "#/texts/106", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 7, + "bbox": { + "l": 329.1140441894531, + "t": 502.5775146484375, + "r": 513.2442016601562, + "b": 493.3287353515625, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 168 + ] + } + ], + "orig": "KDD '22, August 14-18, 2022, Washington, DC, USA \u00a9 2022 Copyright held by the owner/author(s). ACM ISBN 978-1-4503-9385-0/22/08. https://doi.org/10.1145/3534678.3539043", + "text": "KDD '22, August 14-18, 2022, Washington, DC, USA \u00a9 2022 Copyright held by the owner/author(s). ACM ISBN 978-1-4503-9385-0/22/08. https://doi.org/10.1145/3534678.3539043" + }, + { + "self_ref": "#/texts/107", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 7, + "bbox": { + "l": 329.0572509765625, + "t": 490.3890686035156, + "r": 445.8473205566406, + "b": 486.1141662597656, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 84 + ] + } + ], + "orig": "Figure 1: Four examples of complex page layouts across different document categories", + "text": "Figure 1: Four examples of complex page layouts across different document categories" + }, + { + "self_ref": "#/texts/108", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "section_header", + "prov": [ + { + "page_no": 7, + "bbox": { + "l": 329.223876953125, + "t": 479.91156005859375, + "r": 359.9208984375, + "b": 474.4564514160156, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 8 + ] + } + ], + "orig": "KEYWORDS", + "text": "KEYWORDS" + }, + { + "self_ref": "#/texts/109", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 7, + "bbox": { + "l": 329.092529296875, + "t": 469.5487365722656, + "r": 454.5943603515625, + "b": 465.4438781738281, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 90 + ] + } + ], + "orig": "PDF document conversion, layout segmentation, object-detection, data set, Machine Learning", + "text": "PDF document conversion, layout segmentation, object-detection, data set, Machine Learning" + }, + { + "self_ref": "#/texts/110", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "section_header", + "prov": [ + { + "page_no": 7, + "bbox": { + "l": 329.1321716308594, + "t": 459.6901550292969, + "r": 388.247802734375, + "b": 453.86309814453125, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 21 + ] + } + ], + "orig": "ACM Reference Format:", + "text": "ACM Reference Format:" + }, + { + "self_ref": "#/texts/111", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 7, + "bbox": { + "l": 328.9222412109375, + "t": 448.7705383300781, + "r": 528.159423828125, + "b": 435.41400146484375, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 374 + ] + } + ], + "orig": "Birgit Pfitzmann, Christoph Auer, Michele Dolfi, Ahmed S. Nassar, and Peter Staar. 2022. DocLayNet: A Large Human-Annotated Dataset for DocumentLayout Analysis. In Proceedings of the 28th ACM SIGKDD Conference on Knowledge Discovery and Data Mining (KDD '22), August 14-18, 2022, Washington, DC, USA. ACM, New York, NY, USA, 9 pages. https://doi.org/10.1145/ 3534678.3539043", + "text": "Birgit Pfitzmann, Christoph Auer, Michele Dolfi, Ahmed S. Nassar, and Peter Staar. 2022. DocLayNet: A Large Human-Annotated Dataset for DocumentLayout Analysis. In Proceedings of the 28th ACM SIGKDD Conference on Knowledge Discovery and Data Mining (KDD '22), August 14-18, 2022, Washington, DC, USA. ACM, New York, NY, USA, 9 pages. https://doi.org/10.1145/ 3534678.3539043" + }, + { + "self_ref": "#/texts/112", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "caption", + "prov": [ + { + "page_no": 7, + "bbox": { + "l": 221.84927368164062, + "t": 499.2803955078125, + "r": 312.25115966796875, + "b": 490.75177001953125, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 84 + ] + } + ], + "orig": "Figure 1: Four examples of complex page layouts across different document categories", + "text": "Figure 1: Four examples of complex page layouts across different document categories" + }, + { + "self_ref": "#/texts/113", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "section_header", + "prov": [ + { + "page_no": 7, + "bbox": { + "l": 221.95114135742188, + "t": 480.065673828125, + "r": 245.78892517089844, + "b": 474.63507080078125, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 8 + ] + } + ], + "orig": "KEYWORDS", + "text": "KEYWORDS" + }, + { + "self_ref": "#/texts/114", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 7, + "bbox": { + "l": 222.00753784179688, + "t": 474.62298583984375, + "r": 312.0212097167969, + "b": 465.4729919433594, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 90 + ] + } + ], + "orig": "PDF document conversion, layout segmentation, object-detection, data set, Machine Learning", + "text": "PDF document conversion, layout segmentation, object-detection, data set, Machine Learning" + }, + { + "self_ref": "#/texts/115", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "section_header", + "prov": [ + { + "page_no": 7, + "bbox": { + "l": 222.01475524902344, + "t": 462.0861511230469, + "r": 254.69903564453125, + "b": 458.1186218261719, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 21 + ] + } + ], + "orig": "ACM Reference Format:", + "text": "ACM Reference Format:" + }, + { + "self_ref": "#/texts/116", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 7, + "bbox": { + "l": 221.68344116210938, + "t": 458.718994140625, + "r": 312.1560974121094, + "b": 436.15557861328125, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 380 + ] + } + ], + "orig": "Birgit Pfitzmann, Christoph Auer, Michele Dolfi , Ahmed S. Nassar, and Peter Staar. 2022. DocLayNet: A Large Human-Annotated Dataset for DocumentLayout Analysis. In Proceedings of the 28th ACM SIGKDD Conference on Knowledge Discovery and Data Mining (KDD '22), August 14-18, 2022, Wash-$_{ington, DC, USA.}$ACM, New York, NY, USA, 9 pages. https://doi.org/10.1145/ 3534678.3539043", + "text": "Birgit Pfitzmann, Christoph Auer, Michele Dolfi , Ahmed S. Nassar, and Peter Staar. 2022. DocLayNet: A Large Human-Annotated Dataset for DocumentLayout Analysis. In Proceedings of the 28th ACM SIGKDD Conference on Knowledge Discovery and Data Mining (KDD '22), August 14-18, 2022, Wash-$_{ington, DC, USA.}$ACM, New York, NY, USA, 9 pages. https://doi.org/10.1145/ 3534678.3539043" + }, + { + "self_ref": "#/texts/117", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 7, + "bbox": { + "l": 329.6015930175781, + "t": 428.9794921875, + "r": 373.37646484375, + "b": 423.8311462402344, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 14 + ] + } + ], + "orig": "1 INTRODUCTION", + "text": "1 INTRODUCTION" + }, + { + "self_ref": "#/texts/118", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 7, + "bbox": { + "l": 107.31889343261719, + "t": 420.2637939453125, + "r": 527.5916137695312, + "b": 377.62860107421875, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 1027 + ] + } + ], + "orig": "Despite the substantial improvements achieved with machine-learning (ML) approaches and deep neural networks in recent years, document conversion remains a challenging problem, as demonstrated by the numerous public competitions held on this topic [1-4]. The challenge originates from the huge variability in PDF documents regarding layout, language and formats (scanned, programmatic or a combination of both). Engineering a single ML model that can be applied on all types of documents and provides high-quality layout segmentation remains to this day extremely challenging [5]. To highlight the variability in document layouts, we show a few example documents from the DocLayNet dataset in Figure 1. Figure 2: Title page of the DocLayNet paper (arxiv .org/pdf/2206.01062) - left PDF, right rendered Markdown. If recognized, metadata such as authors are appearing first under the title. Text content inside figures is currently dropped, the caption is retained and linked to the figure in the JSON representation (not shown).", + "text": "Despite the substantial improvements achieved with machine-learning (ML) approaches and deep neural networks in recent years, document conversion remains a challenging problem, as demonstrated by the numerous public competitions held on this topic [1-4]. The challenge originates from the huge variability in PDF documents regarding layout, language and formats (scanned, programmatic or a combination of both). Engineering a single ML model that can be applied on all types of documents and provides high-quality layout segmentation remains to this day extremely challenging [5]. To highlight the variability in document layouts, we show a few example documents from the DocLayNet dataset in Figure 1. Figure 2: Title page of the DocLayNet paper (arxiv .org/pdf/2206.01062) - left PDF, right rendered Markdown. If recognized, metadata such as authors are appearing first under the title. Text content inside figures is currently dropped, the caption is retained and linked to the figure in the JSON representation (not shown)." + }, + { + "self_ref": "#/texts/119", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "page_footer", + "prov": [ + { + "page_no": 7, + "bbox": { + "l": 302.8258056640625, + "t": 49.2652587890625, + "r": 308.49029541015625, + "b": 39.960079193115234, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 1 + ] + } + ], + "orig": "7", + "text": "7" + }, + { + "self_ref": "#/texts/120", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "page_header", + "prov": [ + { + "page_no": 7, + "bbox": { + "l": 110.2352066040039, + "t": 618.2011108398438, + "r": 118.32157135009766, + "b": 492.749267578125, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 37 + ] + } + ], + "orig": "arXiv:2206.01062v1 [cs.CV] 2 Jun 2022", + "text": "arXiv:2206.01062v1 [cs.CV] 2 Jun 2022" + }, + { + "self_ref": "#/texts/121", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 8, + "bbox": { + "l": 122.18534851074219, + "t": 563.207763671875, + "r": 338.8071594238281, + "b": 558.6549682617188, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 130 + ] + } + ], + "orig": "KDD '22, August 14-18, 2022, Washington, DC, USA Birgit Pfitzmann, Christoph Auer, Michele Dolfi, Ahmed S. Nassar, and Peter Staar", + "text": "KDD '22, August 14-18, 2022, Washington, DC, USA Birgit Pfitzmann, Christoph Auer, Michele Dolfi, Ahmed S. Nassar, and Peter Staar" + }, + { + "self_ref": "#/texts/122", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "caption", + "prov": [ + { + "page_no": 8, + "bbox": { + "l": 122.11329650878906, + "t": 552.1026611328125, + "r": 226.37594604492188, + "b": 509.48504638671875, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 489 + ] + } + ], + "orig": "Table 2: Prediction performance (mAP@0.5-0.95) of object detection networks on DocLayNet test set. The MRCNN (Mask R-CNN) and FRCNN (Faster R-CNN) models with ResNet-50 or ResNet-101 backbone were trained based on the network architectures from the detectron2 model zoo (Mask R-CNN R50, R101-FPN 3x, Faster R-CNN R101-FPN 3x), with default configurations. The YOLO implementation utilized was YOLOv5x6 [13]. All models were initialised using pre-trained weights from the COCO 2017 dataset.", + "text": "Table 2: Prediction performance (mAP@0.5-0.95) of object detection networks on DocLayNet test set. The MRCNN (Mask R-CNN) and FRCNN (Faster R-CNN) models with ResNet-50 or ResNet-101 backbone were trained based on the network architectures from the detectron2 model zoo (Mask R-CNN R50, R101-FPN 3x, Faster R-CNN R101-FPN 3x), with default configurations. The YOLO implementation utilized was YOLOv5x6 [13]. All models were initialised using pre-trained weights from the COCO 2017 dataset." + }, + { + "self_ref": "#/texts/123", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 8, + "bbox": { + "l": 121.85212707519531, + "t": 431.1610107421875, + "r": 226.33633422851562, + "b": 341.54669189453125, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 1252 + ] + } + ], + "orig": "to avoid this at any cost in order to have clear, unbiased baseline numbers for human document-layout annotation. Third, we introduced the feature of snapping boxes around text segments to obtain a pixel-accurate annotation and again reduce time and effort. The CCS annotation tool automatically shrinks every user-drawn box to the minimum bounding-box around the enclosed text-cells for all purely text-based segments, which excludes only Table and $_{Picture}$. For the latter, we instructed annotation staffto minimise inclusion of surrounding whitespace while including all graphical lines. A downside of snapping boxes to enclosed text cells is that some wrongly parsed PDF pages cannot be annotated correctly and need to be skipped. Fourth, we established a way toflag pages as rejected for cases where no valid annotation according to the label guidelines could be achieved. Example cases for this would be PDF pages that render incorrectly or contain layouts that are impossible to capture with non-overlapping rectangles. Such rejected pages are not contained in thefinal dataset. With all these measures in place, experienced annotation staffmanaged to annotate a single page in a typical timeframe of 20s to 60s, depending on its complexity.", + "text": "to avoid this at any cost in order to have clear, unbiased baseline numbers for human document-layout annotation. Third, we introduced the feature of snapping boxes around text segments to obtain a pixel-accurate annotation and again reduce time and effort. The CCS annotation tool automatically shrinks every user-drawn box to the minimum bounding-box around the enclosed text-cells for all purely text-based segments, which excludes only Table and $_{Picture}$. For the latter, we instructed annotation staffto minimise inclusion of surrounding whitespace while including all graphical lines. A downside of snapping boxes to enclosed text cells is that some wrongly parsed PDF pages cannot be annotated correctly and need to be skipped. Fourth, we established a way toflag pages as rejected for cases where no valid annotation according to the label guidelines could be achieved. Example cases for this would be PDF pages that render incorrectly or contain layouts that are impossible to capture with non-overlapping rectangles. Such rejected pages are not contained in thefinal dataset. With all these measures in place, experienced annotation staffmanaged to annotate a single page in a typical timeframe of 20s to 60s, depending on its complexity." + }, + { + "self_ref": "#/texts/124", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "section_header", + "prov": [ + { + "page_no": 8, + "bbox": { + "l": 122.35355377197266, + "t": 338.0934753417969, + "r": 163.32470703125, + "b": 331.835693359375, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 13 + ] + } + ], + "orig": "5 EXPERIMENTS", + "text": "5 EXPERIMENTS" + }, + { + "self_ref": "#/texts/125", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 8, + "bbox": { + "l": 122.00563049316406, + "t": 327.5806884765625, + "r": 226.2816162109375, + "b": 284.8097229003906, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 584 + ] + } + ], + "orig": "The primary goal of DocLayNet is to obtain high-quality ML models capable of accurate document-layout analysis on a wide variety of challenging layouts. As discussed in Section 2, object detection models are currently the easiest to use, due to the standardisation of ground-truth data in COCO format [16] and the availability of general frameworks such as detectron2 [17]. Furthermore, baseline numbers in PubLayNet and DocBank were obtained using standard object detection models such as Mask R-CNN and Faster R-CNN. As such, we will relate to these object detection methods in this", + "text": "The primary goal of DocLayNet is to obtain high-quality ML models capable of accurate document-layout analysis on a wide variety of challenging layouts. As discussed in Section 2, object detection models are currently the easiest to use, due to the standardisation of ground-truth data in COCO format [16] and the availability of general frameworks such as detectron2 [17]. Furthermore, baseline numbers in PubLayNet and DocBank were obtained using standard object detection models such as Mask R-CNN and Faster R-CNN. As such, we will relate to these object detection methods in this" + }, + { + "self_ref": "#/texts/126", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "caption", + "prov": [ + { + "page_no": 8, + "bbox": { + "l": 107.26910400390625, + "t": 267.0020751953125, + "r": 504.2988586425781, + "b": 224.93768310546875, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 393 + ] + } + ], + "orig": "Figure 3: Page 6 of the DocLayNet paper. If recognized, metadata such as authors are appearing first under the title. Elements recognized as page headers or footers are suppressed in Markdown to deliver uninterrupted content in reading order. Tables are inserted in reading order. The paragraph in \"5. Experiments\" wrapping over the column end is broken up in two and interrupted by the table.", + "text": "Figure 3: Page 6 of the DocLayNet paper. If recognized, metadata such as authors are appearing first under the title. Elements recognized as page headers or footers are suppressed in Markdown to deliver uninterrupted content in reading order. Tables are inserted in reading order. The paragraph in \"5. Experiments\" wrapping over the column end is broken up in two and interrupted by the table." + }, + { + "self_ref": "#/texts/127", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 8, + "bbox": { + "l": 235.38954162597656, + "t": 469.9726867675781, + "r": 339.28778076171875, + "b": 441.4075927734375, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 329 + ] + } + ], + "orig": "Figure 5: Prediction performance (mAP@0.5-0.95) of a Mask R-CNN network with ResNet50 backbone trained on increasing fractions of the DocLayNet dataset. The learning curv eflattens around the 80% mark, indicating that increasing the size of the DocLayNet dataset with similar data will not yield significantly better predictions.", + "text": "Figure 5: Prediction performance (mAP@0.5-0.95) of a Mask R-CNN network with ResNet50 backbone trained on increasing fractions of the DocLayNet dataset. The learning curv eflattens around the 80% mark, indicating that increasing the size of the DocLayNet dataset with similar data will not yield significantly better predictions." + }, + { + "self_ref": "#/texts/128", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 8, + "bbox": { + "l": 234.98081970214844, + "t": 425.5683898925781, + "r": 338.644775390625, + "b": 415.5873718261719, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 102 + ] + } + ], + "orig": "paper and leave the detailed evaluation of more recent methods mentioned in Section 2 for future work.", + "text": "paper and leave the detailed evaluation of more recent methods mentioned in Section 2 for future work." + }, + { + "self_ref": "#/texts/129", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 8, + "bbox": { + "l": 234.98487854003906, + "t": 416.19970703125, + "r": 338.76287841796875, + "b": 382.79742431640625, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 397 + ] + } + ], + "orig": "In this section, we will present several aspects related to the performance of object detection models on DocLayNet. Similarly as in PubLayNet, we will evaluate the quality of their predictions using mean average precision (mAP) with 10 overlaps that range from 0.5 to 0.95 in steps of 0.05 (mAP@0.5-0.95). These scores are computed by leveraging the evaluation code provided by the COCO API [16].", + "text": "In this section, we will present several aspects related to the performance of object detection models on DocLayNet. Similarly as in PubLayNet, we will evaluate the quality of their predictions using mean average precision (mAP) with 10 overlaps that range from 0.5 to 0.95 in steps of 0.05 (mAP@0.5-0.95). These scores are computed by leveraging the evaluation code provided by the COCO API [16]." + }, + { + "self_ref": "#/texts/130", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "section_header", + "prov": [ + { + "page_no": 8, + "bbox": { + "l": 235.3143310546875, + "t": 377.12237548828125, + "r": 299.73687744140625, + "b": 370.8646240234375, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 30 + ] + } + ], + "orig": "Baselines for Object Detection", + "text": "Baselines for Object Detection" + }, + { + "self_ref": "#/texts/131", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 8, + "bbox": { + "l": 235.06893920898438, + "t": 370.8502197265625, + "r": 338.89947509765625, + "b": 285.920654296875, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 1144 + ] + } + ], + "orig": "In Table 2, we present baseline experiments (given in mAP) on Mask R-CNN [12], Faster R-CNN [11], and YOLOv5 [13]. Both training and evaluation were performed on RGB images with dimensions of $^{1025}$\u00d71025 pixels. For training, we only used one annotation in case of redundantly annotated pages. As one can observe, the variation in mAP between the models is rather low, but overall between 6 and 10% lower than the mAP computed from the pairwise human annotations on triple-annotated pages. This gives a good indication that the DocLayNet dataset poses a worthwhile challenge for the research community to close the gap between human recognition and ML approaches. It is interesting to see that Mask R-CNN and Faster R-CNN produce very comparable mAP scores, indicating that pixel-based image segmentation derived from bounding-boxes does not help to obtain better predictions. On the other hand, the more recent Yolov5x model does very well and even out-performs humans on selected labels such as $_{Text}$, Table and $_{Picture}$. This is not entirely surprising, as and Picture are abundant and the most visually distinctive in a document.", + "text": "In Table 2, we present baseline experiments (given in mAP) on Mask R-CNN [12], Faster R-CNN [11], and YOLOv5 [13]. Both training and evaluation were performed on RGB images with dimensions of $^{1025}$\u00d71025 pixels. For training, we only used one annotation in case of redundantly annotated pages. As one can observe, the variation in mAP between the models is rather low, but overall between 6 and 10% lower than the mAP computed from the pairwise human annotations on triple-annotated pages. This gives a good indication that the DocLayNet dataset poses a worthwhile challenge for the research community to close the gap between human recognition and ML approaches. It is interesting to see that Mask R-CNN and Faster R-CNN produce very comparable mAP scores, indicating that pixel-based image segmentation derived from bounding-boxes does not help to obtain better predictions. On the other hand, the more recent Yolov5x model does very well and even out-performs humans on selected labels such as $_{Text}$, Table and $_{Picture}$. This is not entirely surprising, as and Picture are abundant and the most visually distinctive in a document." + }, + { + "self_ref": "#/texts/132", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 8, + "bbox": { + "l": 366.3333435058594, + "t": 563.0970458984375, + "r": 527.1106567382812, + "b": 547.0772705078125, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 419 + ] + } + ], + "orig": "Prediclion Derormance (up80.5-0.85 ohobeci detecion lalo ks Doclaynal Lest saL Ine VACNN (Mask R-CNNI and FACNN (Faster A-CNM) modcs mith PosNc: 50 PosNo: 101 backtone woro trainod based on Enc nchwwcrk achrocturos tom Ihc Oeronhroase a-CNn aso rioi-Fpn Jx, FasieA-Cnn a1o1-FPN Jx), wilh delaui conlwuralions The YoUg mpomorcabon utilzod w2s YoloSyb(13| modos woro inbalsod usino cro-trunodmonhts hron Coco 2017 datasor", + "text": "Prediclion Derormance (up80.5-0.85 ohobeci detecion lalo ks Doclaynal Lest saL Ine VACNN (Mask R-CNNI and FACNN (Faster A-CNM) modcs mith PosNc: 50 PosNo: 101 backtone woro trainod based on Enc nchwwcrk achrocturos tom Ihc Oeronhroase a-CNn aso rioi-Fpn Jx, FasieA-Cnn a1o1-FPN Jx), wilh delaui conlwuralions The YoUg mpomorcabon utilzod w2s YoloSyb(13| modos woro inbalsod usino cro-trunodmonhts hron Coco 2017 datasor" + }, + { + "self_ref": "#/texts/133", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 8, + "bbox": { + "l": 365.95367431640625, + "t": 447.0, + "r": 530.2679443359375, + "b": 405.3583984375, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 934 + ] + } + ], + "orig": "iD avod Ihbs arcost cha unbasndbasolino numoc human cocumnnt-Laycut annotalion; Thrd Inirooucod leatura 0i snapoina Doxes around lerl scainunis cblan & pixel-accuiale annolaton and aJan feduce Bifre and elonThe CCS annoinbon aloMalca shruks Ovory Usor-drawnboro mnmum boundino-borarounaIho onclosod coxt-colls Purolytort basud scoitontwhich uxclldcs Ort Tatlo and Picluo latsor Inssucicdannjlabon sha mnim so inclusion Suitcurding mlospeco whloIncvon Oenoncang doans d0 oisnaocmnbors Onchse Ihal So10 wioogly Daisoc Pogcs Cannol be annotalcd coTcCEY and nccd supocd Foudn Oshdned Wuyio(aq Dagcs (ccclod Cases whcion valid anncuabon eccofding abeiqu Oelines coukbe acheneu Eamnole Case, flis wouk PDF peoe3 Ihal rendernnccrrecUy contanlavuta hat Imnosshk cantra milh Vananonnyogannio{ Suchiceciodoaoos not coralnon Ihofnn hr Aroknacoarreehetyn annollca slall nluuocd unnoln sina \" Puou lypical Lmnetamre 0l 20s 10 605 cecendnc conoanty", + "text": "iD avod Ihbs arcost cha unbasndbasolino numoc human cocumnnt-Laycut annotalion; Thrd Inirooucod leatura 0i snapoina Doxes around lerl scainunis cblan & pixel-accuiale annolaton and aJan feduce Bifre and elonThe CCS annoinbon aloMalca shruks Ovory Usor-drawnboro mnmum boundino-borarounaIho onclosod coxt-colls Purolytort basud scoitontwhich uxclldcs Ort Tatlo and Picluo latsor Inssucicdannjlabon sha mnim so inclusion Suitcurding mlospeco whloIncvon Oenoncang doans d0 oisnaocmnbors Onchse Ihal So10 wioogly Daisoc Pogcs Cannol be annotalcd coTcCEY and nccd supocd Foudn Oshdned Wuyio(aq Dagcs (ccclod Cases whcion valid anncuabon eccofding abeiqu Oelines coukbe acheneu Eamnole Case, flis wouk PDF peoe3 Ihal rendernnccrrecUy contanlavuta hat Imnosshk cantra milh Vananonnyogannio{ Suchiceciodoaoos not coralnon Ihofnn hr Aroknacoarreehetyn annollca slall nluuocd unnoln sina \" Puou lypical Lmnetamre 0l 20s 10 605 cecendnc conoanty" + }, + { + "self_ref": "#/texts/134", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "section_header", + "prov": [ + { + "page_no": 8, + "bbox": { + "l": 366.17059326171875, + "t": 400.3333435058594, + "r": 404.3333435058594, + "b": 395.0, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 13 + ] + } + ], + "orig": "5 EXPERIMENTS", + "text": "5 EXPERIMENTS" + }, + { + "self_ref": "#/texts/135", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 8, + "bbox": { + "l": 366.0, + "t": 391.0, + "r": 529.8655395507812, + "b": 370.37261962890625, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 418 + ] + } + ], + "orig": "Ine crimary goal OocVAYNo cblan hion-quality Modols AccuaiodoaMoiuvana4s WMeVanalon chalcnonglayoul: Cecurdg echon Doicdi Delccion modcb rtene Casistlo Usc, Quulo Hhndandiubon ground-vuth data COCO lornat [16] and avaladloy enetal Irarnenoiks uch derectrcnz7] Furnemmcre, baseline nmnoe < I Putun Notand DocBank calanodusnsundad coict dosnchonmodols such Mas< A CNN and Fasior A CNN SuEna blraomhdelecfa nonInr Canacle", + "text": "Ine crimary goal OocVAYNo cblan hion-quality Modols AccuaiodoaMoiuvana4s WMeVanalon chalcnonglayoul: Cecurdg echon Doicdi Delccion modcb rtene Casistlo Usc, Quulo Hhndandiubon ground-vuth data COCO lornat [16] and avaladloy enetal Irarnenoiks uch derectrcnz7] Furnemmcre, baseline nmnoe < I Putun Notand DocBank calanodusnsundad coict dosnchonmodols such Mas< A CNN and Fasior A CNN SuEna blraomhdelecfa nonInr Canacle" + }, + { + "self_ref": "#/texts/136", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 8, + "bbox": { + "l": 365.9671936035156, + "t": 367.0, + "r": 528.6666870117188, + "b": 354.9878845214844, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 268 + ] + } + ], + "orig": "Fauri Prco chon ocrloianC( 005-095) ola Mask A-CNN ncthoik ilh AcsNciSo backbono brainod on incrcasing Iracbons oi DocLaynei calasot Tne loannp auro altons around Ih0 \u20ac03 noicahino Ihal inxreasing /e 520 Q Iho DocL\u00f8y Nel dalasot Amardaen nol Ycid sn: dorOocC Chons LAD", + "text": "Fauri Prco chon ocrloianC( 005-095) ola Mask A-CNN ncthoik ilh AcsNciSo backbono brainod on incrcasing Iracbons oi DocLaynei calasot Tne loannp auro altons around Ih0 \u20ac03 noicahino Ihal inxreasing /e 520 Q Iho DocL\u00f8y Nel dalasot Amardaen nol Ycid sn: dorOocC Chons LAD" + }, + { + "self_ref": "#/texts/137", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 8, + "bbox": { + "l": 365.8995056152344, + "t": 351.3333435058594, + "r": 489.40869140625, + "b": 347.69952392578125, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 83 + ] + } + ], + "orig": "pangrandloave detallod evalvallon %moro rcoarimolhods monionan Secilg Jorhlure work", + "text": "pangrandloave detallod evalvallon %moro rcoarimolhods monionan Secilg Jorhlure work" + }, + { + "self_ref": "#/texts/138", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 8, + "bbox": { + "l": 366.1520690917969, + "t": 344.3362731933594, + "r": 527.7802124023438, + "b": 332.3333435058594, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 296 + ] + } + ], + "orig": "Inuhs sechon All Deseni seur8/ asoecis reles00 Perormanoe ouieci celec on DoxclayNet Simamtas In PLoLaynnt oyuato tnn qualmy cuthnlr crodictionsusiramnanavnna prncisicn (TTAP) wch IDovrdaos that rangn trom 0 5ta 005 (nap,o6-00: Ml olue Fnoula Cvurbar uvalaion coou piayIed DY Ihu COCO API/161 ook", + "text": "Inuhs sechon All Deseni seur8/ asoecis reles00 Perormanoe ouieci celec on DoxclayNet Simamtas In PLoLaynnt oyuato tnn qualmy cuthnlr crodictionsusiramnanavnna prncisicn (TTAP) wch IDovrdaos that rangn trom 0 5ta 005 (nap,o6-00: Ml olue Fnoula Cvurbar uvalaion coou piayIed DY Ihu COCO API/161 ook" + }, + { + "self_ref": "#/texts/139", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "section_header", + "prov": [ + { + "page_no": 8, + "bbox": { + "l": 366.2955322265625, + "t": 328.0, + "r": 434.3333435058594, + "b": 321.3333435058594, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 30 + ] + } + ], + "orig": "Baselines for Object Detection", + "text": "Baselines for Object Detection" + }, + { + "self_ref": "#/texts/140", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 8, + "bbox": { + "l": 365.9697570800781, + "t": 317.6666564941406, + "r": 529.27099609375, + "b": 280.0965881347656, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 825 + ] + } + ], + "orig": "ptesenl baselne expenrnenls (Qvenin MAF) on Mas< R-CNN /121 Fasler F-CNN [11] an1 YOLOvS [13] Bou1 brann anavailang woropomormod AGa Imnoos vith dimonsions 1025 chxrols For tralring onN usodomannolatln Incaso ohcuunourfhunnolulco Dac3 Ohenn Vuruhoninptalunhamagny usnaroA en hn 10?7 loworrnannomap conoutec paicaisehuman anncrbons Aoo-amculeopnnos Ins Cves nacaton thatrhe DocLayNot daasci DOfo s mornwro clagnoo [csoarcncomrurt gap bctwoon human focogniticn and VL aporoaces nlelesuio IharNaska-CNNead Fasler GNincroova comnanen Maseoes nnocauna Ulbi AICBasodnanc scomrorubon oormvod Irom bounon)ooros Ooo{ abuin totcrorcochons Ontho chornnno Mcrocconi YolavSrmrodel does verywell und even Dul-Perdorins selectedlubels such Tedle undpcturl enbeh surcrisio Ta oloandPchre poincant amimemostasiaIN ishinsine documen: Ouau hnne", + "text": "ptesenl baselne expenrnenls (Qvenin MAF) on Mas< R-CNN /121 Fasler F-CNN [11] an1 YOLOvS [13] Bou1 brann anavailang woropomormod AGa Imnoos vith dimonsions 1025 chxrols For tralring onN usodomannolatln Incaso ohcuunourfhunnolulco Dac3 Ohenn Vuruhoninptalunhamagny usnaroA en hn 10?7 loworrnannomap conoutec paicaisehuman anncrbons Aoo-amculeopnnos Ins Cves nacaton thatrhe DocLayNot daasci DOfo s mornwro clagnoo [csoarcncomrurt gap bctwoon human focogniticn and VL aporoaces nlelesuio IharNaska-CNNead Fasler GNincroova comnanen Maseoes nnocauna Ulbi AICBasodnanc scomrorubon oormvod Irom bounon)ooros Ooo{ abuin totcrorcochons Ontho chornnno Mcrocconi YolavSrmrodel does verywell und even Dul-Perdorins selectedlubels such Tedle undpcturl enbeh surcrisio Ta oloandPchre poincant amimemostasiaIN ishinsine documen: Ouau hnne" + }, + { + "self_ref": "#/texts/141", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "page_footer", + "prov": [ + { + "page_no": 8, + "bbox": { + "l": 303.0059509277344, + "t": 48.90887451171875, + "r": 308.49029541015625, + "b": 39.960079193115234, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 1 + ] + } + ], + "orig": "8", + "text": "8" + }, + { + "self_ref": "#/texts/142", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 9, + "bbox": { + "l": 88.67599487304688, + "t": 598.9852294921875, + "r": 346.2541809082031, + "b": 593.6693115234375, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 130 + ] + } + ], + "orig": "KDD '22, August 14-18, 2022, Washington, DC, USA Birgit Pfitzmann, Christoph Auer, Michele Dolfi, Ahmed S. Nassar, and Peter Staar", + "text": "KDD '22, August 14-18, 2022, Washington, DC, USA Birgit Pfitzmann, Christoph Auer, Michele Dolfi, Ahmed S. Nassar, and Peter Staar" + }, + { + "self_ref": "#/texts/143", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "caption", + "prov": [ + { + "page_no": 9, + "bbox": { + "l": 88.52484130859375, + "t": 586.8209228515625, + "r": 525.9969482421875, + "b": 561.3492431640625, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 699 + ] + } + ], + "orig": "Table 1: DocLayNet dataset overview. Along with the frequency of each class label, we present the relative occurrence (as % of row \"Total\") in the train, test and validation sets. The inter-annotator agreement is computed as the mAP@0.5-0.95 metric between pairwise annotations from the tripleannotated pages, from which we obtain accuracy ranges. Table 1: DocLayNet dataset overview. Along with the frequency of each class label, we present the relative occurr ence (as % of row \"Total\") in the train, test and validation sets. The inter-annotator agreement is computed as the mAP@0.5-0.95 metric between pairwise annotations from the triple-annotated pages, from which we obtain accuracy ranges. B", + "text": "Table 1: DocLayNet dataset overview. Along with the frequency of each class label, we present the relative occurrence (as % of row \"Total\") in the train, test and validation sets. The inter-annotator agreement is computed as the mAP@0.5-0.95 metric between pairwise annotations from the tripleannotated pages, from which we obtain accuracy ranges. Table 1: DocLayNet dataset overview. Along with the frequency of each class label, we present the relative occurr ence (as % of row \"Total\") in the train, test and validation sets. The inter-annotator agreement is computed as the mAP@0.5-0.95 metric between pairwise annotations from the triple-annotated pages, from which we obtain accuracy ranges. B" + }, + { + "self_ref": "#/texts/144", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "caption", + "prov": [ + { + "page_no": 9, + "bbox": { + "l": 88.67599487304688, + "t": 347.296630859375, + "r": 108.26393127441406, + "b": 318.76702880859375, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 46 + ] + } + ], + "orig": "Figure 3: face. The laid te be drawn the respe", + "text": "Figure 3: face. The laid te be drawn the respe" + }, + { + "self_ref": "#/texts/145", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 9, + "bbox": { + "l": 88.50696563720703, + "t": 306.8683776855469, + "r": 212.13279724121094, + "b": 277.8305358886719, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 293 + ] + } + ], + "orig": "we distribute d the annotation workload and performed continuous quality contr ols. Phase one and two required a small team of experts only. For phases three and four, a group of 40 dedicated annotators were assembled and supervised. Phase 1: Data selection and preparation. Our inclusion cri-", + "text": "we distribute d the annotation workload and performed continuous quality contr ols. Phase one and two required a small team of experts only. For phases three and four, a group of 40 dedicated annotators were assembled and supervised. Phase 1: Data selection and preparation. Our inclusion cri-" + }, + { + "self_ref": "#/texts/146", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 9, + "bbox": { + "l": 327.33526611328125, + "t": 415.4449157714844, + "r": 347.025390625, + "b": 375.5401916503906, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 58 + ] + } + ], + "orig": "of pages ed by seerties. For cument figur es or object how", + "text": "of pages ed by seerties. For cument figur es or object how" + }, + { + "self_ref": "#/texts/147", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 9, + "bbox": { + "l": 223.4002227783203, + "t": 370.67547607421875, + "r": 347.0276794433594, + "b": 280.1531982421875, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 430 + ] + } + ], + "orig": "d the colfealayout labels. Pageand $_{Title}$. class cificity ed for of the ambiguous, while coverage ensures that all meaningful items on a page can be annotated. We refrained from class labels that are very specific to a document category, such as Abstract in the Scientific Articles category. We also avoided class labels that are tightly linked to the semantics of the text. Labels such as Author and $_{Affiliation}$, as seen", + "text": "d the colfealayout labels. Pageand $_{Title}$. class cificity ed for of the ambiguous, while coverage ensures that all meaningful items on a page can be annotated. We refrained from class labels that are very specific to a document category, such as Abstract in the Scientific Articles category. We also avoided class labels that are tightly linked to the semantics of the text. Labels such as Author and $_{Affiliation}$, as seen" + }, + { + "self_ref": "#/texts/148", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "paragraph", + "prov": [ + { + "page_no": 9, + "bbox": { + "l": 88.67599487304688, + "t": 281.1365966796875, + "r": 504.1103515625, + "b": 213.95611572265625, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 737 + ] + } + ], + "orig": "teria for documents were described in Section 3. A large effort went into ensuring that all documents are free to use. The data sources in DocBank, are often only distinguishable by discriminating on $^{3}$https://arxiv.org/ Figure 4: Table 1 from the DocLayNet paper in the original PDF (A), as rendered Markdown (B) and in JSON representation (C). Spanning table cells, such as the multi-column header \"triple interannotator mAP@0.5-0.95 (%)\", is repeated for each column in the Markdown representation (B), which guarantees that every data point can be traced back to row and column headings only by its grid coordinates in the table. In the JSON representation, the span information is reflected in the fields of each table cell (C).", + "text": "teria for documents were described in Section 3. A large effort went into ensuring that all documents are free to use. The data sources in DocBank, are often only distinguishable by discriminating on $^{3}$https://arxiv.org/ Figure 4: Table 1 from the DocLayNet paper in the original PDF (A), as rendered Markdown (B) and in JSON representation (C). Spanning table cells, such as the multi-column header \"triple interannotator mAP@0.5-0.95 (%)\", is repeated for each column in the Markdown representation (B), which guarantees that every data point can be traced back to row and column headings only by its grid coordinates in the table. In the JSON representation, the span information is reflected in the fields of each table cell (C)." + }, + { + "self_ref": "#/texts/149", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "page_footer", + "prov": [ + { + "page_no": 9, + "bbox": { + "l": 302.54315185546875, + "t": 49.2738037109375, + "r": 308.49029541015625, + "b": 39.96010971069336, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 1 + ] + } + ], + "orig": "9", + "text": "9" + } + ], + "pictures": [ + { + "self_ref": "#/pictures/0", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "picture", + "prov": [ + { + "page_no": 1, + "bbox": { + "l": 261.9665222167969, + "t": 715.8966064453125, + "r": 348.6589660644531, + "b": 627.1333618164062, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 0 + ] + } + ], + "captions": [], + "references": [], + "footnotes": [], + "data": {} + }, + { + "self_ref": "#/pictures/1", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "picture", + "prov": [ + { + "page_no": 3, + "bbox": { + "l": 110.07240295410156, + "t": 719.2913208007812, + "r": 501.97247314453125, + "b": 581.2926025390625, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 134 + ] + } + ], + "captions": [ + { + "$ref": "#/texts/30" + } + ], + "references": [], + "footnotes": [], + "data": {} + }, + { + "self_ref": "#/pictures/2", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "picture", + "prov": [ + { + "page_no": 7, + "bbox": { + "l": 223.45220947265625, + "t": 606.3411865234375, + "r": 277.1463623046875, + "b": 563.2439575195312, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 84 + ] + } + ], + "captions": [ + { + "$ref": "#/texts/112" + } + ], + "references": [], + "footnotes": [], + "data": {} + }, + { + "self_ref": "#/pictures/3", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "picture", + "prov": [ + { + "page_no": 7, + "bbox": { + "l": 224.67953491210938, + "t": 560.5714111328125, + "r": 268.130126953125, + "b": 503.4938049316406, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 0 + ] + } + ], + "captions": [], + "references": [], + "footnotes": [], + "data": {} + }, + { + "self_ref": "#/pictures/4", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "picture", + "prov": [ + { + "page_no": 7, + "bbox": { + "l": 279.0318603515625, + "t": 607.0249633789062, + "r": 312.2329406738281, + "b": 562.7503662109375, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 0 + ] + } + ], + "captions": [], + "references": [], + "footnotes": [], + "data": {} + }, + { + "self_ref": "#/pictures/5", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "picture", + "prov": [ + { + "page_no": 7, + "bbox": { + "l": 269.2328186035156, + "t": 558.8635864257812, + "r": 311.7486877441406, + "b": 502.9947814941406, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 0 + ] + } + ], + "captions": [], + "references": [], + "footnotes": [], + "data": {} + }, + { + "self_ref": "#/pictures/6", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "picture", + "prov": [ + { + "page_no": 8, + "bbox": { + "l": 237.64300537109375, + "t": 550.145751953125, + "r": 337.0115966796875, + "b": 477.0093994140625, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 393 + ] + } + ], + "captions": [ + { + "$ref": "#/texts/126" + } + ], + "references": [], + "footnotes": [], + "data": {} + }, + { + "self_ref": "#/pictures/7", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "picture", + "prov": [ + { + "page_no": 9, + "bbox": { + "l": 110.4301528930664, + "t": 573.9806518554688, + "r": 124.71573638916016, + "b": 559.47119140625, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 46 + ] + } + ], + "captions": [ + { + "$ref": "#/texts/144" + } + ], + "references": [], + "footnotes": [], + "data": {} + }, + { + "self_ref": "#/pictures/8", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "picture", + "prov": [ + { + "page_no": 9, + "bbox": { + "l": 101.61099243164062, + "t": 471.4068603515625, + "r": 338.6873474121094, + "b": 308.3857421875, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 0 + ] + } + ], + "captions": [], + "references": [], + "footnotes": [], + "data": {} + }, + { + "self_ref": "#/pictures/9", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "picture", + "prov": [ + { + "page_no": 9, + "bbox": { + "l": 328.66815185546875, + "t": 558.557861328125, + "r": 544.793212890625, + "b": 414.3171081542969, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 0 + ] + } + ], + "captions": [], + "references": [], + "footnotes": [], + "data": {} + } + ], + "tables": [ + { + "self_ref": "#/tables/0", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "table", + "prov": [ + { + "page_no": 5, + "bbox": { + "l": 133.71340942382812, + "t": 635.0601806640625, + "r": 477.5060729980469, + "b": 542.3740844726562, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 0 + ] + } + ], + "captions": [ + { + "$ref": "#/texts/56" + } + ], + "references": [], + "footnotes": [], + "data": { + "table_cells": [ + { + "bbox": { + "l": 134.5240020751953, + "t": 626.0866088867188, + "r": 153.90126037597656, + "b": 617.1800537109375, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "CPU", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 207.9080047607422, + "t": 631.5416259765625, + "r": 236.12208557128906, + "b": 611.72607421875, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 2, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 2, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "Thread budget", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 272.6700134277344, + "t": 631.5416259765625, + "r": 332.2065124511719, + "b": 622.6350708007812, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 3, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 2, + "end_col_offset_idx": 5, + "text": "native backend", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 383.2847595214844, + "t": 631.5416259765625, + "r": 456.9681091308594, + "b": 622.6350708007812, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 3, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 5, + "end_col_offset_idx": 8, + "text": "pypdfium backend", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 249.87600708007812, + "t": 615.8296508789062, + "r": 267.5895080566406, + "b": 606.923095703125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "TTS", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 281.337890625, + "t": 615.8296508789062, + "r": 311.0762634277344, + "b": 606.923095703125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "Pages/s", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 329.3974914550781, + "t": 615.8296508789062, + "r": 350.42852783203125, + "b": 606.923095703125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "Mem", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 370.552978515625, + "t": 615.8296508789062, + "r": 388.2664794921875, + "b": 606.923095703125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 5, + "end_col_offset_idx": 6, + "text": "TTS", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 402.0148620605469, + "t": 615.8296508789062, + "r": 431.75323486328125, + "b": 606.923095703125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 6, + "end_col_offset_idx": 7, + "text": "Pages/s", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 450.074462890625, + "t": 615.8296508789062, + "r": 471.1054992675781, + "b": 606.923095703125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 7, + "end_col_offset_idx": 8, + "text": "Mem", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 134.5240020751953, + "t": 599.9186401367188, + "r": 195.95338439941406, + "b": 591.0120849609375, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Apple M3 Max", + "column_header": false, + "row_header": true, + "row_section": false + }, + { + "bbox": { + "l": 207.9080047607422, + "t": 599.9186401367188, + "r": 212.88929748535156, + "b": 591.0120849609375, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "4", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 248.0771942138672, + "t": 599.9186401367188, + "r": 269.38720703125, + "b": 591.0120849609375, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "177 s", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 287.49920654296875, + "t": 599.9186401367188, + "r": 304.9337463378906, + "b": 591.0120849609375, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "1.27", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 323.0360107421875, + "t": 594.463623046875, + "r": 356.79925537109375, + "b": 585.5570678710938, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 2, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 4, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "6.20 GB", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 368.7539978027344, + "t": 599.9186401367188, + "r": 390.06402587890625, + "b": 591.0120849609375, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 5, + "end_col_offset_idx": 6, + "text": "103 s", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 408.176025390625, + "t": 599.9186401367188, + "r": 425.6105651855469, + "b": 591.0120849609375, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 6, + "end_col_offset_idx": 7, + "text": "2.18", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 443.7130126953125, + "t": 594.463623046875, + "r": 477.47625732421875, + "b": 585.5570678710938, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 3, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 5, + "start_col_offset_idx": 7, + "end_col_offset_idx": 8, + "text": "2.56 GB", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 134.5240020751953, + "t": 589.0096435546875, + "r": 174.6334228515625, + "b": 580.1030883789062, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "(16 cores)", + "column_header": false, + "row_header": true, + "row_section": false + }, + { + "bbox": { + "l": 207.90802001953125, + "t": 589.0096435546875, + "r": 217.87062072753906, + "b": 580.1030883789062, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "16", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 248.0772247314453, + "t": 589.0096435546875, + "r": 269.3872375488281, + "b": 580.1030883789062, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "167 s", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 287.4992370605469, + "t": 589.0096435546875, + "r": 304.93377685546875, + "b": 580.1030883789062, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "1.34", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 371.2448425292969, + "t": 589.0096435546875, + "r": 387.57354736328125, + "b": 580.1030883789062, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 5, + "end_col_offset_idx": 6, + "text": "92 s", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 408.1662292480469, + "t": 589.0096435546875, + "r": 425.60076904296875, + "b": 580.1030883789062, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 6, + "end_col_offset_idx": 7, + "text": "2.45", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 134.5240020751953, + "t": 573.0986328125, + "r": 190.13523864746094, + "b": 553.2830810546875, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Intel(R) Xeon E5-2690", + "column_header": false, + "row_header": true, + "row_section": false + }, + { + "bbox": { + "l": 207.9080047607422, + "t": 573.0986328125, + "r": 217.87062072753906, + "b": 553.2830810546875, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "4 16", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 248.0771942138672, + "t": 573.0986328125, + "r": 269.3872375488281, + "b": 553.2830810546875, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "375 s 244 s", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 287.49920654296875, + "t": 573.0986328125, + "r": 304.93377685546875, + "b": 553.2830810546875, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "0.60 0.92", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 323.0360107421875, + "t": 567.6436157226562, + "r": 356.79925537109375, + "b": 558.737060546875, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "6.16 GB", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 368.7539978027344, + "t": 573.0986328125, + "r": 390.064208984375, + "b": 553.2830810546875, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 5, + "end_col_offset_idx": 6, + "text": "239 s 143 s", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 408.166259765625, + "t": 573.0986328125, + "r": 425.6105651855469, + "b": 553.2830810546875, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 6, + "end_col_offset_idx": 7, + "text": "0.94 1.57", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 443.7130126953125, + "t": 567.6436157226562, + "r": 477.47625732421875, + "b": 558.737060546875, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 7, + "end_col_offset_idx": 8, + "text": "2.42 GB", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + "num_rows": 5, + "num_cols": 8, + "grid": [ + [ + { + "bbox": { + "l": 134.5240020751953, + "t": 626.0866088867188, + "r": 153.90126037597656, + "b": 617.1800537109375, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "CPU", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 207.9080047607422, + "t": 631.5416259765625, + "r": 236.12208557128906, + "b": 611.72607421875, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 2, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 2, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "Thread budget", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 272.6700134277344, + "t": 631.5416259765625, + "r": 332.2065124511719, + "b": 622.6350708007812, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 3, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 2, + "end_col_offset_idx": 5, + "text": "native backend", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 272.6700134277344, + "t": 631.5416259765625, + "r": 332.2065124511719, + "b": 622.6350708007812, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 3, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 2, + "end_col_offset_idx": 5, + "text": "native backend", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 272.6700134277344, + "t": 631.5416259765625, + "r": 332.2065124511719, + "b": 622.6350708007812, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 3, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 2, + "end_col_offset_idx": 5, + "text": "native backend", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 383.2847595214844, + "t": 631.5416259765625, + "r": 456.9681091308594, + "b": 622.6350708007812, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 3, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 5, + "end_col_offset_idx": 8, + "text": "pypdfium backend", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 383.2847595214844, + "t": 631.5416259765625, + "r": 456.9681091308594, + "b": 622.6350708007812, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 3, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 5, + "end_col_offset_idx": 8, + "text": "pypdfium backend", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 383.2847595214844, + "t": 631.5416259765625, + "r": 456.9681091308594, + "b": 622.6350708007812, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 3, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 5, + "end_col_offset_idx": 8, + "text": "pypdfium backend", + "column_header": true, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 207.9080047607422, + "t": 631.5416259765625, + "r": 236.12208557128906, + "b": 611.72607421875, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 2, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 2, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "Thread budget", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 249.87600708007812, + "t": 615.8296508789062, + "r": 267.5895080566406, + "b": 606.923095703125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "TTS", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 281.337890625, + "t": 615.8296508789062, + "r": 311.0762634277344, + "b": 606.923095703125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "Pages/s", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 329.3974914550781, + "t": 615.8296508789062, + "r": 350.42852783203125, + "b": 606.923095703125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "Mem", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 370.552978515625, + "t": 615.8296508789062, + "r": 388.2664794921875, + "b": 606.923095703125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 5, + "end_col_offset_idx": 6, + "text": "TTS", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 402.0148620605469, + "t": 615.8296508789062, + "r": 431.75323486328125, + "b": 606.923095703125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 6, + "end_col_offset_idx": 7, + "text": "Pages/s", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 450.074462890625, + "t": 615.8296508789062, + "r": 471.1054992675781, + "b": 606.923095703125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 7, + "end_col_offset_idx": 8, + "text": "Mem", + "column_header": true, + "row_header": false, + "row_section": false + } + ], + [ + { + "bbox": { + "l": 134.5240020751953, + "t": 599.9186401367188, + "r": 195.95338439941406, + "b": 591.0120849609375, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Apple M3 Max", + "column_header": false, + "row_header": true, + "row_section": false + }, + { + "bbox": { + "l": 207.9080047607422, + "t": 599.9186401367188, + "r": 212.88929748535156, + "b": 591.0120849609375, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "4", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 248.0771942138672, + "t": 599.9186401367188, + "r": 269.38720703125, + "b": 591.0120849609375, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "177 s", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 287.49920654296875, + "t": 599.9186401367188, + "r": 304.9337463378906, + "b": 591.0120849609375, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "1.27", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 323.0360107421875, + "t": 594.463623046875, + "r": 356.79925537109375, + "b": 585.5570678710938, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 2, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 4, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "6.20 GB", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 368.7539978027344, + "t": 599.9186401367188, + "r": 390.06402587890625, + "b": 591.0120849609375, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 5, + "end_col_offset_idx": 6, + "text": "103 s", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 408.176025390625, + "t": 599.9186401367188, + "r": 425.6105651855469, + "b": 591.0120849609375, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 6, + "end_col_offset_idx": 7, + "text": "2.18", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 443.7130126953125, + "t": 594.463623046875, + "r": 477.47625732421875, + "b": 585.5570678710938, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 3, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 5, + "start_col_offset_idx": 7, + "end_col_offset_idx": 8, + "text": "2.56 GB", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "bbox": { + "l": 134.5240020751953, + "t": 589.0096435546875, + "r": 174.6334228515625, + "b": 580.1030883789062, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "(16 cores)", + "column_header": false, + "row_header": true, + "row_section": false + }, + { + "bbox": { + "l": 207.90802001953125, + "t": 589.0096435546875, + "r": 217.87062072753906, + "b": 580.1030883789062, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "16", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 248.0772247314453, + "t": 589.0096435546875, + "r": 269.3872375488281, + "b": 580.1030883789062, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "167 s", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 287.4992370605469, + "t": 589.0096435546875, + "r": 304.93377685546875, + "b": 580.1030883789062, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "1.34", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 323.0360107421875, + "t": 594.463623046875, + "r": 356.79925537109375, + "b": 585.5570678710938, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 2, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 4, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "6.20 GB", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 371.2448425292969, + "t": 589.0096435546875, + "r": 387.57354736328125, + "b": 580.1030883789062, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 5, + "end_col_offset_idx": 6, + "text": "92 s", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 408.1662292480469, + "t": 589.0096435546875, + "r": 425.60076904296875, + "b": 580.1030883789062, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 6, + "end_col_offset_idx": 7, + "text": "2.45", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 443.7130126953125, + "t": 594.463623046875, + "r": 477.47625732421875, + "b": 585.5570678710938, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 3, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 5, + "start_col_offset_idx": 7, + "end_col_offset_idx": 8, + "text": "2.56 GB", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "bbox": { + "l": 134.5240020751953, + "t": 573.0986328125, + "r": 190.13523864746094, + "b": 553.2830810546875, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Intel(R) Xeon E5-2690", + "column_header": false, + "row_header": true, + "row_section": false + }, + { + "bbox": { + "l": 207.9080047607422, + "t": 573.0986328125, + "r": 217.87062072753906, + "b": 553.2830810546875, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "4 16", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 248.0771942138672, + "t": 573.0986328125, + "r": 269.3872375488281, + "b": 553.2830810546875, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "375 s 244 s", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 287.49920654296875, + "t": 573.0986328125, + "r": 304.93377685546875, + "b": 553.2830810546875, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "0.60 0.92", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 323.0360107421875, + "t": 567.6436157226562, + "r": 356.79925537109375, + "b": 558.737060546875, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "6.16 GB", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 368.7539978027344, + "t": 573.0986328125, + "r": 390.064208984375, + "b": 553.2830810546875, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 5, + "end_col_offset_idx": 6, + "text": "239 s 143 s", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 408.166259765625, + "t": 573.0986328125, + "r": 425.6105651855469, + "b": 553.2830810546875, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 6, + "end_col_offset_idx": 7, + "text": "0.94 1.57", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 443.7130126953125, + "t": 567.6436157226562, + "r": 477.47625732421875, + "b": 558.737060546875, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 7, + "end_col_offset_idx": 8, + "text": "2.42 GB", + "column_header": false, + "row_header": false, + "row_section": false + } + ] + ] + } + }, + { + "self_ref": "#/tables/1", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "table", + "prov": [ + { + "page_no": 8, + "bbox": { + "l": 125.886474609375, + "t": 505.5043640136719, + "r": 223.0053253173828, + "b": 437.8017578125, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 0 + ] + } + ], + "captions": [ + { + "$ref": "#/texts/122" + } + ], + "references": [], + "footnotes": [], + "data": { + "table_cells": [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 156.58157348632812, + "t": 505.0888977050781, + "r": 167.5352020263672, + "b": 499.7922058105469, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 2, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 2, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "human", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 174.1665496826172, + "t": 505.0888977050781, + "r": 187.46572875976562, + "b": 499.7922058105469, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 2, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 2, + "end_col_offset_idx": 4, + "text": "MRCNN", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 194.09616088867188, + "t": 505.0888977050781, + "r": 206.03860473632812, + "b": 499.7922058105469, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "FRCNN", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 210.296630859375, + "t": 505.0888977050781, + "r": 219.76319885253906, + "b": 499.7922058105469, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 5, + "end_col_offset_idx": 6, + "text": "YOLO", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 171.98147583007812, + "t": 500.404541015625, + "r": 177.79554748535156, + "b": 495.10784912109375, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "R50", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 182.24139404296875, + "t": 500.404541015625, + "r": 189.83763122558594, + "b": 495.10784912109375, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "R101", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 196.26876831054688, + "t": 500.404541015625, + "r": 203.86502075195312, + "b": 495.10784912109375, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "R101", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 211.35777282714844, + "t": 500.404541015625, + "r": 218.7049102783203, + "b": 495.10784912109375, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 5, + "end_col_offset_idx": 6, + "text": "v5x6", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 128.9252166748047, + "t": 495.5500793457031, + "r": 141.400390625, + "b": 490.2533874511719, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Caption", + "column_header": false, + "row_header": true, + "row_section": false + }, + { + "bbox": { + "l": 157.84637451171875, + "t": 495.5500793457031, + "r": 166.27047729492188, + "b": 490.2533874511719, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "84-89", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 171.7938232421875, + "t": 495.5500793457031, + "r": 177.98348999023438, + "b": 490.2533874511719, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "68.4", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 182.9467315673828, + "t": 495.5500793457031, + "r": 189.13641357421875, + "b": 490.2533874511719, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "71.5", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 196.97412109375, + "t": 495.5500793457031, + "r": 203.16378784179688, + "b": 490.2533874511719, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "70.1", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 211.9366455078125, + "t": 495.5500793457031, + "r": 218.1263427734375, + "b": 490.2533874511719, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 5, + "end_col_offset_idx": 6, + "text": "77.7", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 128.9252166748047, + "t": 490.8657531738281, + "r": 142.81845092773438, + "b": 485.56903076171875, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Footnote", + "column_header": false, + "row_header": true, + "row_section": false + }, + { + "bbox": { + "l": 157.84637451171875, + "t": 490.8657531738281, + "r": 166.27047729492188, + "b": 485.56903076171875, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "83-91", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 171.7938232421875, + "t": 490.8657531738281, + "r": 177.98348999023438, + "b": 485.56903076171875, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "70.9", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 182.9467315673828, + "t": 490.8657531738281, + "r": 189.13641357421875, + "b": 485.56903076171875, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "71.8", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 196.97412109375, + "t": 490.8657531738281, + "r": 203.16378784179688, + "b": 485.56903076171875, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "73.7", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 211.9366455078125, + "t": 490.8657531738281, + "r": 218.1263427734375, + "b": 485.56903076171875, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 5, + "end_col_offset_idx": 6, + "text": "77.2", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 128.9252166748047, + "t": 486.181396484375, + "r": 141.96762084960938, + "b": 480.88470458984375, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Formula", + "column_header": false, + "row_header": true, + "row_section": false + }, + { + "bbox": { + "l": 157.84637451171875, + "t": 486.181396484375, + "r": 166.27047729492188, + "b": 480.88470458984375, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "83-85", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 171.7938232421875, + "t": 486.181396484375, + "r": 177.98348999023438, + "b": 480.88470458984375, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "60.1", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 182.9467315673828, + "t": 486.181396484375, + "r": 189.13641357421875, + "b": 480.88470458984375, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "63.4", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 196.97412109375, + "t": 486.181396484375, + "r": 203.16378784179688, + "b": 480.88470458984375, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "63.5", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 211.9366455078125, + "t": 486.181396484375, + "r": 218.1263427734375, + "b": 480.88470458984375, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 5, + "end_col_offset_idx": 6, + "text": "66.2", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 128.9252166748047, + "t": 481.4970703125, + "r": 142.97943115234375, + "b": 476.20037841796875, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "List-item", + "column_header": false, + "row_header": true, + "row_section": false + }, + { + "bbox": { + "l": 157.84637451171875, + "t": 481.4970703125, + "r": 166.27047729492188, + "b": 476.20037841796875, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "87-88", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 171.7938232421875, + "t": 481.4970703125, + "r": 177.98348999023438, + "b": 476.20037841796875, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "81.2", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 182.9467315673828, + "t": 481.4970703125, + "r": 189.13641357421875, + "b": 476.20037841796875, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "80.8", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 196.97412109375, + "t": 481.4970703125, + "r": 203.16378784179688, + "b": 476.20037841796875, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "81.0", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 211.9366455078125, + "t": 481.4970703125, + "r": 218.1263427734375, + "b": 476.20037841796875, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 5, + "end_col_offset_idx": 6, + "text": "86.2", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 128.9252166748047, + "t": 476.812744140625, + "r": 147.10333251953125, + "b": 471.51605224609375, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Page-footer", + "column_header": false, + "row_header": true, + "row_section": false + }, + { + "bbox": { + "l": 157.84637451171875, + "t": 476.812744140625, + "r": 166.27047729492188, + "b": 471.51605224609375, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "93-94", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 171.7938232421875, + "t": 476.812744140625, + "r": 177.98348999023438, + "b": 471.51605224609375, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "61.6", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 182.9467315673828, + "t": 476.812744140625, + "r": 189.13641357421875, + "b": 471.51605224609375, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "59.3", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 196.97412109375, + "t": 476.812744140625, + "r": 203.16378784179688, + "b": 471.51605224609375, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "58.9", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 211.9366455078125, + "t": 476.812744140625, + "r": 218.1263427734375, + "b": 471.51605224609375, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 5, + "end_col_offset_idx": 6, + "text": "61.1", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 128.9252166748047, + "t": 472.1283874511719, + "r": 148.27993774414062, + "b": 466.83172607421875, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 7, + "end_row_offset_idx": 8, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Page-header", + "column_header": false, + "row_header": true, + "row_section": false + }, + { + "bbox": { + "l": 157.84637451171875, + "t": 472.1283874511719, + "r": 166.27047729492188, + "b": 466.83172607421875, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 7, + "end_row_offset_idx": 8, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "85-89", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 171.7938232421875, + "t": 472.1283874511719, + "r": 177.98348999023438, + "b": 466.83172607421875, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 7, + "end_row_offset_idx": 8, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "71.9", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 182.9467315673828, + "t": 472.1283874511719, + "r": 189.13641357421875, + "b": 466.83172607421875, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 7, + "end_row_offset_idx": 8, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "70.0", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 196.97412109375, + "t": 472.1283874511719, + "r": 203.16378784179688, + "b": 466.83172607421875, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 7, + "end_row_offset_idx": 8, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "72.0", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 211.9366455078125, + "t": 472.1283874511719, + "r": 218.1263427734375, + "b": 466.83172607421875, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 7, + "end_row_offset_idx": 8, + "start_col_offset_idx": 5, + "end_col_offset_idx": 6, + "text": "67.9", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 128.9252166748047, + "t": 467.44403076171875, + "r": 140.03213500976562, + "b": 462.1473693847656, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 8, + "end_row_offset_idx": 9, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Picture", + "column_header": false, + "row_header": true, + "row_section": false + }, + { + "bbox": { + "l": 157.84637451171875, + "t": 467.44403076171875, + "r": 166.27047729492188, + "b": 462.1473693847656, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 8, + "end_row_offset_idx": 9, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "69-71", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 171.7938232421875, + "t": 467.44403076171875, + "r": 177.98348999023438, + "b": 462.1473693847656, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 8, + "end_row_offset_idx": 9, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "71.7", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 182.9467315673828, + "t": 467.44403076171875, + "r": 189.13641357421875, + "b": 462.1473693847656, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 8, + "end_row_offset_idx": 9, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "72.7", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 8, + "end_row_offset_idx": 9, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 211.9366455078125, + "t": 467.44403076171875, + "r": 218.1263427734375, + "b": 462.1473693847656, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 8, + "end_row_offset_idx": 9, + "start_col_offset_idx": 5, + "end_col_offset_idx": 6, + "text": "77.1", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 128.9252166748047, + "t": 462.75970458984375, + "r": 152.32334899902344, + "b": 457.4630126953125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 9, + "end_row_offset_idx": 10, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Section-header", + "column_header": false, + "row_header": true, + "row_section": false + }, + { + "bbox": { + "l": 157.84637451171875, + "t": 462.75970458984375, + "r": 166.27047729492188, + "b": 457.4630126953125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 9, + "end_row_offset_idx": 10, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "83-84", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 171.7938232421875, + "t": 462.75970458984375, + "r": 177.98348999023438, + "b": 457.4630126953125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 9, + "end_row_offset_idx": 10, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "67.6", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 182.9467315673828, + "t": 462.75970458984375, + "r": 189.13641357421875, + "b": 457.4630126953125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 9, + "end_row_offset_idx": 10, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "69.3", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 196.97412109375, + "t": 462.75970458984375, + "r": 203.16378784179688, + "b": 457.4630126953125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 9, + "end_row_offset_idx": 10, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "68.4", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 211.9366455078125, + "t": 462.75970458984375, + "r": 218.1263427734375, + "b": 457.4630126953125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 9, + "end_row_offset_idx": 10, + "start_col_offset_idx": 5, + "end_col_offset_idx": 6, + "text": "74.6", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 128.9252166748047, + "t": 458.07537841796875, + "r": 137.39146423339844, + "b": 452.7786865234375, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 10, + "end_row_offset_idx": 11, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Table", + "column_header": false, + "row_header": true, + "row_section": false + }, + { + "bbox": { + "l": 157.84637451171875, + "t": 458.07537841796875, + "r": 166.27047729492188, + "b": 452.7786865234375, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 10, + "end_row_offset_idx": 11, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "77-81", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 171.7938232421875, + "t": 458.07537841796875, + "r": 177.98348999023438, + "b": 452.7786865234375, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 10, + "end_row_offset_idx": 11, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "82.2", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 182.9467315673828, + "t": 458.07537841796875, + "r": 189.13641357421875, + "b": 452.7786865234375, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 10, + "end_row_offset_idx": 11, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "82.9", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 196.97412109375, + "t": 458.07537841796875, + "r": 203.16378784179688, + "b": 452.7786865234375, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 10, + "end_row_offset_idx": 11, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "82.2", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 211.9366455078125, + "t": 458.07537841796875, + "r": 218.1263427734375, + "b": 452.7786865234375, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 10, + "end_row_offset_idx": 11, + "start_col_offset_idx": 5, + "end_col_offset_idx": 6, + "text": "86.3", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 128.9252166748047, + "t": 453.3914489746094, + "r": 135.74728393554688, + "b": 448.09478759765625, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 11, + "end_row_offset_idx": 12, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Text", + "column_header": false, + "row_header": true, + "row_section": false + }, + { + "bbox": { + "l": 157.84637451171875, + "t": 453.3914489746094, + "r": 166.27047729492188, + "b": 448.09478759765625, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 11, + "end_row_offset_idx": 12, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "84-86", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 171.7938232421875, + "t": 453.3914489746094, + "r": 177.98348999023438, + "b": 448.09478759765625, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 11, + "end_row_offset_idx": 12, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "84.6", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 182.9467315673828, + "t": 453.3914489746094, + "r": 189.13641357421875, + "b": 448.09478759765625, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 11, + "end_row_offset_idx": 12, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "85.8", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 196.97412109375, + "t": 453.3914489746094, + "r": 203.16378784179688, + "b": 448.09478759765625, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 11, + "end_row_offset_idx": 12, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "85.4", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 11, + "end_row_offset_idx": 12, + "start_col_offset_idx": 5, + "end_col_offset_idx": 6, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 12, + "end_row_offset_idx": 13, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 12, + "end_row_offset_idx": 13, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 171.7938232421875, + "t": 448.70709228515625, + "r": 177.98348999023438, + "b": 443.41046142578125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 12, + "end_row_offset_idx": 13, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "76.7", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 182.9467315673828, + "t": 448.70709228515625, + "r": 189.13641357421875, + "b": 443.41046142578125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 12, + "end_row_offset_idx": 13, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "80.4", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 196.97412109375, + "t": 448.70709228515625, + "r": 203.16378784179688, + "b": 443.41046142578125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 12, + "end_row_offset_idx": 13, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "79.9", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 211.9366455078125, + "t": 453.3914489746094, + "r": 218.1263427734375, + "b": 448.09478759765625, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 12, + "end_row_offset_idx": 13, + "start_col_offset_idx": 5, + "end_col_offset_idx": 6, + "text": "88.1", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 128.9252166748047, + "t": 448.70709228515625, + "r": 136.18801879882812, + "b": 443.41046142578125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 13, + "end_row_offset_idx": 14, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Title", + "column_header": false, + "row_header": true, + "row_section": false + }, + { + "bbox": { + "l": 157.84637451171875, + "t": 448.70709228515625, + "r": 166.27047729492188, + "b": 443.41046142578125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 13, + "end_row_offset_idx": 14, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "60-72", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 13, + "end_row_offset_idx": 14, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 13, + "end_row_offset_idx": 14, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 13, + "end_row_offset_idx": 14, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 211.9366455078125, + "t": 448.70709228515625, + "r": 218.1263427734375, + "b": 443.41046142578125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 13, + "end_row_offset_idx": 14, + "start_col_offset_idx": 5, + "end_col_offset_idx": 6, + "text": "82.7", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 128.9252166748047, + "t": 443.85223388671875, + "r": 133.6125030517578, + "b": 438.5555419921875, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 14, + "end_row_offset_idx": 15, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "All", + "column_header": false, + "row_header": true, + "row_section": false + }, + { + "bbox": { + "l": 157.84637451171875, + "t": 443.85223388671875, + "r": 166.27047729492188, + "b": 438.5555419921875, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 14, + "end_row_offset_idx": 15, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "82-83", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 171.7938232421875, + "t": 443.85223388671875, + "r": 177.98348999023438, + "b": 438.5555419921875, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 14, + "end_row_offset_idx": 15, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "72.4", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 182.9467315673828, + "t": 443.85223388671875, + "r": 189.13641357421875, + "b": 438.5555419921875, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 14, + "end_row_offset_idx": 15, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "73.5", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 196.97412109375, + "t": 443.85223388671875, + "r": 203.16378784179688, + "b": 438.5555419921875, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 14, + "end_row_offset_idx": 15, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "73.4", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 211.9366455078125, + "t": 443.85223388671875, + "r": 218.1263427734375, + "b": 438.5555419921875, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 14, + "end_row_offset_idx": 15, + "start_col_offset_idx": 5, + "end_col_offset_idx": 6, + "text": "76.8", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + "num_rows": 15, + "num_cols": 6, + "grid": [ + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 156.58157348632812, + "t": 505.0888977050781, + "r": 167.5352020263672, + "b": 499.7922058105469, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 2, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 2, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "human", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 174.1665496826172, + "t": 505.0888977050781, + "r": 187.46572875976562, + "b": 499.7922058105469, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 2, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 2, + "end_col_offset_idx": 4, + "text": "MRCNN", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 174.1665496826172, + "t": 505.0888977050781, + "r": 187.46572875976562, + "b": 499.7922058105469, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 2, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 2, + "end_col_offset_idx": 4, + "text": "MRCNN", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 194.09616088867188, + "t": 505.0888977050781, + "r": 206.03860473632812, + "b": 499.7922058105469, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "FRCNN", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 210.296630859375, + "t": 505.0888977050781, + "r": 219.76319885253906, + "b": 499.7922058105469, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 5, + "end_col_offset_idx": 6, + "text": "YOLO", + "column_header": true, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 156.58157348632812, + "t": 505.0888977050781, + "r": 167.5352020263672, + "b": 499.7922058105469, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 2, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 2, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "human", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 171.98147583007812, + "t": 500.404541015625, + "r": 177.79554748535156, + "b": 495.10784912109375, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "R50", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 182.24139404296875, + "t": 500.404541015625, + "r": 189.83763122558594, + "b": 495.10784912109375, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "R101", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 196.26876831054688, + "t": 500.404541015625, + "r": 203.86502075195312, + "b": 495.10784912109375, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "R101", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 211.35777282714844, + "t": 500.404541015625, + "r": 218.7049102783203, + "b": 495.10784912109375, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 5, + "end_col_offset_idx": 6, + "text": "v5x6", + "column_header": true, + "row_header": false, + "row_section": false + } + ], + [ + { + "bbox": { + "l": 128.9252166748047, + "t": 495.5500793457031, + "r": 141.400390625, + "b": 490.2533874511719, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Caption", + "column_header": false, + "row_header": true, + "row_section": false + }, + { + "bbox": { + "l": 157.84637451171875, + "t": 495.5500793457031, + "r": 166.27047729492188, + "b": 490.2533874511719, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "84-89", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 171.7938232421875, + "t": 495.5500793457031, + "r": 177.98348999023438, + "b": 490.2533874511719, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "68.4", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 182.9467315673828, + "t": 495.5500793457031, + "r": 189.13641357421875, + "b": 490.2533874511719, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "71.5", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 196.97412109375, + "t": 495.5500793457031, + "r": 203.16378784179688, + "b": 490.2533874511719, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "70.1", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 211.9366455078125, + "t": 495.5500793457031, + "r": 218.1263427734375, + "b": 490.2533874511719, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 5, + "end_col_offset_idx": 6, + "text": "77.7", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "bbox": { + "l": 128.9252166748047, + "t": 490.8657531738281, + "r": 142.81845092773438, + "b": 485.56903076171875, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Footnote", + "column_header": false, + "row_header": true, + "row_section": false + }, + { + "bbox": { + "l": 157.84637451171875, + "t": 490.8657531738281, + "r": 166.27047729492188, + "b": 485.56903076171875, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "83-91", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 171.7938232421875, + "t": 490.8657531738281, + "r": 177.98348999023438, + "b": 485.56903076171875, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "70.9", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 182.9467315673828, + "t": 490.8657531738281, + "r": 189.13641357421875, + "b": 485.56903076171875, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "71.8", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 196.97412109375, + "t": 490.8657531738281, + "r": 203.16378784179688, + "b": 485.56903076171875, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "73.7", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 211.9366455078125, + "t": 490.8657531738281, + "r": 218.1263427734375, + "b": 485.56903076171875, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 5, + "end_col_offset_idx": 6, + "text": "77.2", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "bbox": { + "l": 128.9252166748047, + "t": 486.181396484375, + "r": 141.96762084960938, + "b": 480.88470458984375, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Formula", + "column_header": false, + "row_header": true, + "row_section": false + }, + { + "bbox": { + "l": 157.84637451171875, + "t": 486.181396484375, + "r": 166.27047729492188, + "b": 480.88470458984375, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "83-85", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 171.7938232421875, + "t": 486.181396484375, + "r": 177.98348999023438, + "b": 480.88470458984375, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "60.1", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 182.9467315673828, + "t": 486.181396484375, + "r": 189.13641357421875, + "b": 480.88470458984375, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "63.4", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 196.97412109375, + "t": 486.181396484375, + "r": 203.16378784179688, + "b": 480.88470458984375, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "63.5", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 211.9366455078125, + "t": 486.181396484375, + "r": 218.1263427734375, + "b": 480.88470458984375, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 5, + "end_col_offset_idx": 6, + "text": "66.2", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "bbox": { + "l": 128.9252166748047, + "t": 481.4970703125, + "r": 142.97943115234375, + "b": 476.20037841796875, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "List-item", + "column_header": false, + "row_header": true, + "row_section": false + }, + { + "bbox": { + "l": 157.84637451171875, + "t": 481.4970703125, + "r": 166.27047729492188, + "b": 476.20037841796875, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "87-88", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 171.7938232421875, + "t": 481.4970703125, + "r": 177.98348999023438, + "b": 476.20037841796875, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "81.2", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 182.9467315673828, + "t": 481.4970703125, + "r": 189.13641357421875, + "b": 476.20037841796875, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "80.8", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 196.97412109375, + "t": 481.4970703125, + "r": 203.16378784179688, + "b": 476.20037841796875, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "81.0", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 211.9366455078125, + "t": 481.4970703125, + "r": 218.1263427734375, + "b": 476.20037841796875, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 5, + "end_col_offset_idx": 6, + "text": "86.2", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "bbox": { + "l": 128.9252166748047, + "t": 476.812744140625, + "r": 147.10333251953125, + "b": 471.51605224609375, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Page-footer", + "column_header": false, + "row_header": true, + "row_section": false + }, + { + "bbox": { + "l": 157.84637451171875, + "t": 476.812744140625, + "r": 166.27047729492188, + "b": 471.51605224609375, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "93-94", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 171.7938232421875, + "t": 476.812744140625, + "r": 177.98348999023438, + "b": 471.51605224609375, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "61.6", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 182.9467315673828, + "t": 476.812744140625, + "r": 189.13641357421875, + "b": 471.51605224609375, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "59.3", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 196.97412109375, + "t": 476.812744140625, + "r": 203.16378784179688, + "b": 471.51605224609375, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "58.9", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 211.9366455078125, + "t": 476.812744140625, + "r": 218.1263427734375, + "b": 471.51605224609375, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 5, + "end_col_offset_idx": 6, + "text": "61.1", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "bbox": { + "l": 128.9252166748047, + "t": 472.1283874511719, + "r": 148.27993774414062, + "b": 466.83172607421875, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 7, + "end_row_offset_idx": 8, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Page-header", + "column_header": false, + "row_header": true, + "row_section": false + }, + { + "bbox": { + "l": 157.84637451171875, + "t": 472.1283874511719, + "r": 166.27047729492188, + "b": 466.83172607421875, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 7, + "end_row_offset_idx": 8, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "85-89", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 171.7938232421875, + "t": 472.1283874511719, + "r": 177.98348999023438, + "b": 466.83172607421875, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 7, + "end_row_offset_idx": 8, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "71.9", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 182.9467315673828, + "t": 472.1283874511719, + "r": 189.13641357421875, + "b": 466.83172607421875, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 7, + "end_row_offset_idx": 8, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "70.0", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 196.97412109375, + "t": 472.1283874511719, + "r": 203.16378784179688, + "b": 466.83172607421875, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 7, + "end_row_offset_idx": 8, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "72.0", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 211.9366455078125, + "t": 472.1283874511719, + "r": 218.1263427734375, + "b": 466.83172607421875, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 7, + "end_row_offset_idx": 8, + "start_col_offset_idx": 5, + "end_col_offset_idx": 6, + "text": "67.9", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "bbox": { + "l": 128.9252166748047, + "t": 467.44403076171875, + "r": 140.03213500976562, + "b": 462.1473693847656, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 8, + "end_row_offset_idx": 9, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Picture", + "column_header": false, + "row_header": true, + "row_section": false + }, + { + "bbox": { + "l": 157.84637451171875, + "t": 467.44403076171875, + "r": 166.27047729492188, + "b": 462.1473693847656, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 8, + "end_row_offset_idx": 9, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "69-71", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 171.7938232421875, + "t": 467.44403076171875, + "r": 177.98348999023438, + "b": 462.1473693847656, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 8, + "end_row_offset_idx": 9, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "71.7", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 182.9467315673828, + "t": 467.44403076171875, + "r": 189.13641357421875, + "b": 462.1473693847656, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 8, + "end_row_offset_idx": 9, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "72.7", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 8, + "end_row_offset_idx": 9, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 211.9366455078125, + "t": 467.44403076171875, + "r": 218.1263427734375, + "b": 462.1473693847656, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 8, + "end_row_offset_idx": 9, + "start_col_offset_idx": 5, + "end_col_offset_idx": 6, + "text": "77.1", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "bbox": { + "l": 128.9252166748047, + "t": 462.75970458984375, + "r": 152.32334899902344, + "b": 457.4630126953125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 9, + "end_row_offset_idx": 10, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Section-header", + "column_header": false, + "row_header": true, + "row_section": false + }, + { + "bbox": { + "l": 157.84637451171875, + "t": 462.75970458984375, + "r": 166.27047729492188, + "b": 457.4630126953125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 9, + "end_row_offset_idx": 10, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "83-84", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 171.7938232421875, + "t": 462.75970458984375, + "r": 177.98348999023438, + "b": 457.4630126953125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 9, + "end_row_offset_idx": 10, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "67.6", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 182.9467315673828, + "t": 462.75970458984375, + "r": 189.13641357421875, + "b": 457.4630126953125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 9, + "end_row_offset_idx": 10, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "69.3", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 196.97412109375, + "t": 462.75970458984375, + "r": 203.16378784179688, + "b": 457.4630126953125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 9, + "end_row_offset_idx": 10, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "68.4", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 211.9366455078125, + "t": 462.75970458984375, + "r": 218.1263427734375, + "b": 457.4630126953125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 9, + "end_row_offset_idx": 10, + "start_col_offset_idx": 5, + "end_col_offset_idx": 6, + "text": "74.6", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "bbox": { + "l": 128.9252166748047, + "t": 458.07537841796875, + "r": 137.39146423339844, + "b": 452.7786865234375, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 10, + "end_row_offset_idx": 11, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Table", + "column_header": false, + "row_header": true, + "row_section": false + }, + { + "bbox": { + "l": 157.84637451171875, + "t": 458.07537841796875, + "r": 166.27047729492188, + "b": 452.7786865234375, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 10, + "end_row_offset_idx": 11, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "77-81", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 171.7938232421875, + "t": 458.07537841796875, + "r": 177.98348999023438, + "b": 452.7786865234375, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 10, + "end_row_offset_idx": 11, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "82.2", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 182.9467315673828, + "t": 458.07537841796875, + "r": 189.13641357421875, + "b": 452.7786865234375, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 10, + "end_row_offset_idx": 11, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "82.9", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 196.97412109375, + "t": 458.07537841796875, + "r": 203.16378784179688, + "b": 452.7786865234375, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 10, + "end_row_offset_idx": 11, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "82.2", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 211.9366455078125, + "t": 458.07537841796875, + "r": 218.1263427734375, + "b": 452.7786865234375, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 10, + "end_row_offset_idx": 11, + "start_col_offset_idx": 5, + "end_col_offset_idx": 6, + "text": "86.3", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "bbox": { + "l": 128.9252166748047, + "t": 453.3914489746094, + "r": 135.74728393554688, + "b": 448.09478759765625, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 11, + "end_row_offset_idx": 12, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Text", + "column_header": false, + "row_header": true, + "row_section": false + }, + { + "bbox": { + "l": 157.84637451171875, + "t": 453.3914489746094, + "r": 166.27047729492188, + "b": 448.09478759765625, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 11, + "end_row_offset_idx": 12, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "84-86", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 171.7938232421875, + "t": 453.3914489746094, + "r": 177.98348999023438, + "b": 448.09478759765625, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 11, + "end_row_offset_idx": 12, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "84.6", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 182.9467315673828, + "t": 453.3914489746094, + "r": 189.13641357421875, + "b": 448.09478759765625, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 11, + "end_row_offset_idx": 12, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "85.8", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 196.97412109375, + "t": 453.3914489746094, + "r": 203.16378784179688, + "b": 448.09478759765625, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 11, + "end_row_offset_idx": 12, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "85.4", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 11, + "end_row_offset_idx": 12, + "start_col_offset_idx": 5, + "end_col_offset_idx": 6, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 12, + "end_row_offset_idx": 13, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 12, + "end_row_offset_idx": 13, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 171.7938232421875, + "t": 448.70709228515625, + "r": 177.98348999023438, + "b": 443.41046142578125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 12, + "end_row_offset_idx": 13, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "76.7", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 182.9467315673828, + "t": 448.70709228515625, + "r": 189.13641357421875, + "b": 443.41046142578125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 12, + "end_row_offset_idx": 13, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "80.4", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 196.97412109375, + "t": 448.70709228515625, + "r": 203.16378784179688, + "b": 443.41046142578125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 12, + "end_row_offset_idx": 13, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "79.9", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 211.9366455078125, + "t": 453.3914489746094, + "r": 218.1263427734375, + "b": 448.09478759765625, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 12, + "end_row_offset_idx": 13, + "start_col_offset_idx": 5, + "end_col_offset_idx": 6, + "text": "88.1", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "bbox": { + "l": 128.9252166748047, + "t": 448.70709228515625, + "r": 136.18801879882812, + "b": 443.41046142578125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 13, + "end_row_offset_idx": 14, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Title", + "column_header": false, + "row_header": true, + "row_section": false + }, + { + "bbox": { + "l": 157.84637451171875, + "t": 448.70709228515625, + "r": 166.27047729492188, + "b": 443.41046142578125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 13, + "end_row_offset_idx": 14, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "60-72", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 13, + "end_row_offset_idx": 14, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 13, + "end_row_offset_idx": 14, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 13, + "end_row_offset_idx": 14, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 211.9366455078125, + "t": 448.70709228515625, + "r": 218.1263427734375, + "b": 443.41046142578125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 13, + "end_row_offset_idx": 14, + "start_col_offset_idx": 5, + "end_col_offset_idx": 6, + "text": "82.7", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "bbox": { + "l": 128.9252166748047, + "t": 443.85223388671875, + "r": 133.6125030517578, + "b": 438.5555419921875, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 14, + "end_row_offset_idx": 15, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "All", + "column_header": false, + "row_header": true, + "row_section": false + }, + { + "bbox": { + "l": 157.84637451171875, + "t": 443.85223388671875, + "r": 166.27047729492188, + "b": 438.5555419921875, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 14, + "end_row_offset_idx": 15, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "82-83", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 171.7938232421875, + "t": 443.85223388671875, + "r": 177.98348999023438, + "b": 438.5555419921875, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 14, + "end_row_offset_idx": 15, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "72.4", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 182.9467315673828, + "t": 443.85223388671875, + "r": 189.13641357421875, + "b": 438.5555419921875, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 14, + "end_row_offset_idx": 15, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "73.5", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 196.97412109375, + "t": 443.85223388671875, + "r": 203.16378784179688, + "b": 438.5555419921875, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 14, + "end_row_offset_idx": 15, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "73.4", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 211.9366455078125, + "t": 443.85223388671875, + "r": 218.1263427734375, + "b": 438.5555419921875, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 14, + "end_row_offset_idx": 15, + "start_col_offset_idx": 5, + "end_col_offset_idx": 6, + "text": "76.8", + "column_header": false, + "row_header": false, + "row_section": false + } + ] + ] + } + }, + { + "self_ref": "#/tables/2", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "table", + "prov": [ + { + "page_no": 8, + "bbox": { + "l": 366.86639404296875, + "t": 542.9662475585938, + "r": 460.80865478515625, + "b": 450.93499755859375, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 0 + ] + } + ], + "captions": [], + "references": [], + "footnotes": [], + "data": { + "table_cells": [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 392.6666564941406, + "t": 541.3333129882812, + "r": 401.3333435058594, + "b": 539.3333129882812, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "noun", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 406.3333435058594, + "t": 542.3333129882812, + "r": 417.6666564941406, + "b": 538.3333129882812, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "Mrcnn", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 421.0, + "t": 542.3333129882812, + "r": 433.0, + "b": 538.3333129882812, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "MaCNN", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 436.6666564941406, + "t": 542.0, + "r": 446.0, + "b": 539.3333129882812, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "Frcne", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 451.3333435058594, + "t": 542.0, + "r": 458.6666564941406, + "b": 539.3333129882812, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 5, + "end_col_offset_idx": 6, + "text": "Yolo", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 369.3333435058594, + "t": 528.6666870117188, + "r": 378.6666564941406, + "b": 526.0, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Gaoon", + "column_header": false, + "row_header": true, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 5, + "end_col_offset_idx": 6, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 369.3333435058594, + "t": 522.0, + "r": 380.0, + "b": 519.3333129882812, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Foomolo", + "column_header": false, + "row_header": true, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 5, + "end_col_offset_idx": 6, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 369.3333435058594, + "t": 514.6666870117188, + "r": 379.3333435058594, + "b": 512.6666870117188, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Foula", + "column_header": false, + "row_header": true, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 5, + "end_col_offset_idx": 6, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 369.3333435058594, + "t": 508.6666564941406, + "r": 379.3333435058594, + "b": 506.0, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Ust-lern", + "column_header": false, + "row_header": true, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 5, + "end_col_offset_idx": 6, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 369.3333435058594, + "t": 502.0, + "r": 383.3333435058594, + "b": 499.3333435058594, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Page-locer", + "column_header": false, + "row_header": true, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 5, + "end_col_offset_idx": 6, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 369.3333435058594, + "t": 494.6666564941406, + "r": 384.6666564941406, + "b": 492.0, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Faqe-haje", + "column_header": false, + "row_header": true, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 5, + "end_col_offset_idx": 6, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 369.3333435058594, + "t": 488.6666564941406, + "r": 378.0, + "b": 486.0, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 7, + "end_row_offset_idx": 8, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Pxlu", + "column_header": false, + "row_header": true, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 7, + "end_row_offset_idx": 8, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 7, + "end_row_offset_idx": 8, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 7, + "end_row_offset_idx": 8, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 7, + "end_row_offset_idx": 8, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 7, + "end_row_offset_idx": 8, + "start_col_offset_idx": 5, + "end_col_offset_idx": 6, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 369.3333435058594, + "t": 482.0, + "r": 387.3333435058594, + "b": 479.3333435058594, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 8, + "end_row_offset_idx": 9, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Sonhoade", + "column_header": false, + "row_header": true, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 8, + "end_row_offset_idx": 9, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 8, + "end_row_offset_idx": 9, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 8, + "end_row_offset_idx": 9, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 8, + "end_row_offset_idx": 9, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 8, + "end_row_offset_idx": 9, + "start_col_offset_idx": 5, + "end_col_offset_idx": 6, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + "num_rows": 9, + "num_cols": 6, + "grid": [ + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 392.6666564941406, + "t": 541.3333129882812, + "r": 401.3333435058594, + "b": 539.3333129882812, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "noun", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 406.3333435058594, + "t": 542.3333129882812, + "r": 417.6666564941406, + "b": 538.3333129882812, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "Mrcnn", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 421.0, + "t": 542.3333129882812, + "r": 433.0, + "b": 538.3333129882812, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "MaCNN", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 436.6666564941406, + "t": 542.0, + "r": 446.0, + "b": 539.3333129882812, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "Frcne", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 451.3333435058594, + "t": 542.0, + "r": 458.6666564941406, + "b": 539.3333129882812, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 5, + "end_col_offset_idx": 6, + "text": "Yolo", + "column_header": true, + "row_header": false, + "row_section": false + } + ], + [ + { + "bbox": { + "l": 369.3333435058594, + "t": 528.6666870117188, + "r": 378.6666564941406, + "b": 526.0, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Gaoon", + "column_header": false, + "row_header": true, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 5, + "end_col_offset_idx": 6, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "bbox": { + "l": 369.3333435058594, + "t": 522.0, + "r": 380.0, + "b": 519.3333129882812, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Foomolo", + "column_header": false, + "row_header": true, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 5, + "end_col_offset_idx": 6, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "bbox": { + "l": 369.3333435058594, + "t": 514.6666870117188, + "r": 379.3333435058594, + "b": 512.6666870117188, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Foula", + "column_header": false, + "row_header": true, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 5, + "end_col_offset_idx": 6, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "bbox": { + "l": 369.3333435058594, + "t": 508.6666564941406, + "r": 379.3333435058594, + "b": 506.0, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Ust-lern", + "column_header": false, + "row_header": true, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 5, + "end_col_offset_idx": 6, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "bbox": { + "l": 369.3333435058594, + "t": 502.0, + "r": 383.3333435058594, + "b": 499.3333435058594, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Page-locer", + "column_header": false, + "row_header": true, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 5, + "end_col_offset_idx": 6, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "bbox": { + "l": 369.3333435058594, + "t": 494.6666564941406, + "r": 384.6666564941406, + "b": 492.0, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Faqe-haje", + "column_header": false, + "row_header": true, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 5, + "end_col_offset_idx": 6, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "bbox": { + "l": 369.3333435058594, + "t": 488.6666564941406, + "r": 378.0, + "b": 486.0, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 7, + "end_row_offset_idx": 8, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Pxlu", + "column_header": false, + "row_header": true, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 7, + "end_row_offset_idx": 8, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 7, + "end_row_offset_idx": 8, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 7, + "end_row_offset_idx": 8, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 7, + "end_row_offset_idx": 8, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 7, + "end_row_offset_idx": 8, + "start_col_offset_idx": 5, + "end_col_offset_idx": 6, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "bbox": { + "l": 369.3333435058594, + "t": 482.0, + "r": 387.3333435058594, + "b": 479.3333435058594, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 8, + "end_row_offset_idx": 9, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Sonhoade", + "column_header": false, + "row_header": true, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 8, + "end_row_offset_idx": 9, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 8, + "end_row_offset_idx": 9, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 8, + "end_row_offset_idx": 9, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 8, + "end_row_offset_idx": 9, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 8, + "end_row_offset_idx": 9, + "start_col_offset_idx": 5, + "end_col_offset_idx": 6, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + } + ] + ] + } + }, + { + "self_ref": "#/tables/3", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "table", + "prov": [ + { + "page_no": 9, + "bbox": { + "l": 110.8310546875, + "t": 560.6348876953125, + "r": 323.9291076660156, + "b": 477.7417297363281, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 0 + ] + } + ], + "captions": [ + { + "$ref": "#/texts/143" + } + ], + "references": [], + "footnotes": [], + "data": { + "table_cells": [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 151.05230712890625, + "t": 553.5693969726562, + "r": 162.6777801513672, + "b": 547.2415771484375, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "Count", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 180.66891479492188, + "t": 559.1656494140625, + "r": 199.10299682617188, + "b": 552.837890625, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 3, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 2, + "end_col_offset_idx": 5, + "text": "% of Total", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 229.23550415039062, + "t": 559.1656494140625, + "r": 308.0542907714844, + "b": 552.837890625, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 7, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 5, + "end_col_offset_idx": 12, + "text": "triple inter-annotator mAP @ 0.5-0.95 (%)", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 114.73330688476562, + "t": 553.5693969726562, + "r": 133.57032775878906, + "b": 547.2415771484375, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "class label", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 170.37966918945312, + "t": 553.5693969726562, + "r": 180.53993225097656, + "b": 547.2415771484375, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "Train", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 188.54818725585938, + "t": 553.5693969726562, + "r": 196.27256774902344, + "b": 547.2415771484375, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "Test", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 205.99327087402344, + "t": 553.5693969726562, + "r": 212.00518798828125, + "b": 547.2415771484375, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "Val", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 221.5577850341797, + "t": 553.5693969726562, + "r": 227.15760803222656, + "b": 547.2415771484375, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 5, + "end_col_offset_idx": 6, + "text": "All", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 236.36549377441406, + "t": 553.5693969726562, + "r": 242.30873107910156, + "b": 547.2415771484375, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 6, + "end_col_offset_idx": 7, + "text": "Fin", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 249.04408264160156, + "t": 553.5693969726562, + "r": 257.4598388671875, + "b": 547.2415771484375, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 7, + "end_col_offset_idx": 8, + "text": "Man", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 269.3188171386719, + "t": 553.5693969726562, + "r": 274.7400817871094, + "b": 547.2415771484375, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 8, + "end_col_offset_idx": 9, + "text": "Sci", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 281.9607849121094, + "t": 553.5693969726562, + "r": 289.8912048339844, + "b": 547.2415771484375, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 9, + "end_col_offset_idx": 10, + "text": "Law", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 299.0303955078125, + "t": 553.5693969726562, + "r": 305.0469055175781, + "b": 547.2415771484375, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 10, + "end_col_offset_idx": 11, + "text": "Pat", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 313.22454833984375, + "t": 553.5693969726562, + "r": 320.1980285644531, + "b": 547.2415771484375, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 11, + "end_col_offset_idx": 12, + "text": "T en", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 114.73330688476562, + "t": 547.7698974609375, + "r": 129.63717651367188, + "b": 541.442138671875, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Caption", + "column_header": false, + "row_header": true, + "row_section": false + }, + { + "bbox": { + "l": 152.03225708007812, + "t": 547.7698974609375, + "r": 162.67787170410156, + "b": 541.442138671875, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "22524", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 173.1453857421875, + "t": 547.7698974609375, + "r": 180.5400848388672, + "b": 541.442138671875, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "2.04", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 188.87802124023438, + "t": 547.7698974609375, + "r": 196.27272033691406, + "b": 541.442138671875, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "1.77", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 204.6106414794922, + "t": 547.7698974609375, + "r": 212.00534057617188, + "b": 541.442138671875, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "2.32", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 217.09361267089844, + "t": 547.7698974609375, + "r": 227.15773010253906, + "b": 541.442138671875, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 5, + "end_col_offset_idx": 6, + "text": "84-89", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 232.24473571777344, + "t": 547.7698974609375, + "r": 242.30885314941406, + "b": 541.442138671875, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 6, + "end_col_offset_idx": 7, + "text": "40-61", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 247.39585876464844, + "t": 547.7698974609375, + "r": 257.4599609375, + "b": 541.442138671875, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 7, + "end_col_offset_idx": 8, + "text": "86-92", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 264.67608642578125, + "t": 547.7698974609375, + "r": 274.7402038574219, + "b": 541.442138671875, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 8, + "end_col_offset_idx": 9, + "text": "94-99", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 279.82720947265625, + "t": 547.7698974609375, + "r": 289.8913269042969, + "b": 541.442138671875, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 9, + "end_col_offset_idx": 10, + "text": "95-99", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 294.98291015625, + "t": 547.7698974609375, + "r": 305.0470275878906, + "b": 541.442138671875, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 10, + "end_col_offset_idx": 11, + "text": "69-78", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 314.14501953125, + "t": 547.7698974609375, + "r": 320.1981506347656, + "b": 541.442138671875, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 11, + "end_col_offset_idx": 12, + "text": "n/a", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 114.73330688476562, + "t": 542.1737060546875, + "r": 131.33132934570312, + "b": 535.8458251953125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Footnote", + "column_header": false, + "row_header": true, + "row_section": false + }, + { + "bbox": { + "l": 154.16119384765625, + "t": 542.1737060546875, + "r": 162.6776885986328, + "b": 535.8458251953125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "6318", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 173.1453857421875, + "t": 542.1737060546875, + "r": 180.5400848388672, + "b": 535.8458251953125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "0.60", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 188.87802124023438, + "t": 542.1737060546875, + "r": 196.27272033691406, + "b": 535.8458251953125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "0.31", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 204.6106414794922, + "t": 542.1737060546875, + "r": 212.00534057617188, + "b": 535.8458251953125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "0.58", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 217.09361267089844, + "t": 542.1737060546875, + "r": 227.15773010253906, + "b": 535.8458251953125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 5, + "end_col_offset_idx": 6, + "text": "83-91", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 236.25572204589844, + "t": 542.1737060546875, + "r": 242.308837890625, + "b": 535.8458251953125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 6, + "end_col_offset_idx": 7, + "text": "n/a", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 251.0725860595703, + "t": 542.1737060546875, + "r": 257.4599304199219, + "b": 535.8458251953125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 7, + "end_col_offset_idx": 8, + "text": "100", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 264.6760559082031, + "t": 542.1737060546875, + "r": 274.74017333984375, + "b": 535.8458251953125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 8, + "end_col_offset_idx": 9, + "text": "62-88", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 279.8271789550781, + "t": 542.1737060546875, + "r": 289.89129638671875, + "b": 535.8458251953125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 9, + "end_col_offset_idx": 10, + "text": "85-94", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 298.9938659667969, + "t": 542.1737060546875, + "r": 305.0469970703125, + "b": 535.8458251953125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 10, + "end_col_offset_idx": 11, + "text": "n/a", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 310.1340026855469, + "t": 542.1737060546875, + "r": 320.1981201171875, + "b": 535.8458251953125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 11, + "end_col_offset_idx": 12, + "text": "82-97", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 114.73330688476562, + "t": 536.5774536132812, + "r": 130.31483459472656, + "b": 530.2496337890625, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Formula", + "column_header": false, + "row_header": true, + "row_section": false + }, + { + "bbox": { + "l": 152.03225708007812, + "t": 536.5774536132812, + "r": 162.67787170410156, + "b": 530.2496337890625, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "25027", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 173.1453857421875, + "t": 536.5774536132812, + "r": 180.5400848388672, + "b": 530.2496337890625, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "2.25", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 188.87802124023438, + "t": 536.5774536132812, + "r": 196.27272033691406, + "b": 530.2496337890625, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "1.90", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 204.6106414794922, + "t": 536.5774536132812, + "r": 212.00534057617188, + "b": 530.2496337890625, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "2.96", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 217.09361267089844, + "t": 536.5774536132812, + "r": 227.15773010253906, + "b": 530.2496337890625, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 5, + "end_col_offset_idx": 6, + "text": "83-85", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 6, + "end_col_offset_idx": 7, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 251.40682983398438, + "t": 536.5774536132812, + "r": 257.4599304199219, + "b": 530.2496337890625, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 7, + "end_col_offset_idx": 8, + "text": "n/a", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 264.6760559082031, + "t": 536.5774536132812, + "r": 274.74017333984375, + "b": 530.2496337890625, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 8, + "end_col_offset_idx": 9, + "text": "84-87", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 279.8271789550781, + "t": 536.5774536132812, + "r": 289.89129638671875, + "b": 530.2496337890625, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 9, + "end_col_offset_idx": 10, + "text": "86-96", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 10, + "end_col_offset_idx": 11, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 314.1449890136719, + "t": 536.5774536132812, + "r": 320.1981201171875, + "b": 530.2496337890625, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 11, + "end_col_offset_idx": 12, + "text": "n/a", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 114.73330688476562, + "t": 530.981201171875, + "r": 131.5236358642578, + "b": 524.6533203125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "List-item", + "column_header": false, + "row_header": true, + "row_section": false + }, + { + "bbox": { + "l": 149.9033203125, + "t": 530.981201171875, + "r": 162.67807006835938, + "b": 524.6533203125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "185660", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 171.01646423339844, + "t": 530.981201171875, + "r": 180.540283203125, + "b": 524.6533203125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "17.19", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 186.74908447265625, + "t": 530.981201171875, + "r": 196.2729034423828, + "b": 524.6533203125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "13.34", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 202.48170471191406, + "t": 530.981201171875, + "r": 212.0055389404297, + "b": 524.6533203125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "15.82", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 217.09361267089844, + "t": 530.981201171875, + "r": 227.15773010253906, + "b": 524.6533203125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 5, + "end_col_offset_idx": 6, + "text": "87-88", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 232.24473571777344, + "t": 530.981201171875, + "r": 242.30885314941406, + "b": 524.6533203125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 6, + "end_col_offset_idx": 7, + "text": "74-83", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 247.39585876464844, + "t": 530.981201171875, + "r": 257.4599609375, + "b": 524.6533203125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 7, + "end_col_offset_idx": 8, + "text": "90-92", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 264.67608642578125, + "t": 530.981201171875, + "r": 274.7402038574219, + "b": 524.6533203125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 8, + "end_col_offset_idx": 9, + "text": "97-97", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 279.82720947265625, + "t": 530.981201171875, + "r": 289.8913269042969, + "b": 524.6533203125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 9, + "end_col_offset_idx": 10, + "text": "81-85", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 294.98291015625, + "t": 530.981201171875, + "r": 305.0470275878906, + "b": 524.6533203125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 10, + "end_col_offset_idx": 11, + "text": "75-88", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 310.134033203125, + "t": 530.981201171875, + "r": 320.19818115234375, + "b": 524.6533203125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 11, + "end_col_offset_idx": 12, + "text": "93-95", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 114.73330688476562, + "t": 525.3848876953125, + "r": 136.45037841796875, + "b": 519.05712890625, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Page-footer", + "column_header": false, + "row_header": true, + "row_section": false + }, + { + "bbox": { + "l": 152.03225708007812, + "t": 525.3848876953125, + "r": 162.67787170410156, + "b": 519.05712890625, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "70878", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 173.1453857421875, + "t": 525.3848876953125, + "r": 180.5400848388672, + "b": 519.05712890625, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "6.51", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 188.87802124023438, + "t": 525.3848876953125, + "r": 196.27272033691406, + "b": 519.05712890625, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "5.58", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 204.6106414794922, + "t": 525.3848876953125, + "r": 212.00534057617188, + "b": 519.05712890625, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "6.00", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 217.09361267089844, + "t": 525.3848876953125, + "r": 227.15773010253906, + "b": 519.05712890625, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 5, + "end_col_offset_idx": 6, + "text": "93-94", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 232.24473571777344, + "t": 525.3848876953125, + "r": 242.30885314941406, + "b": 519.05712890625, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 6, + "end_col_offset_idx": 7, + "text": "88-90", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 247.39585876464844, + "t": 525.3848876953125, + "r": 257.4599609375, + "b": 519.05712890625, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 7, + "end_col_offset_idx": 8, + "text": "95-96", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 268.35284423828125, + "t": 525.3848876953125, + "r": 274.7402038574219, + "b": 519.05712890625, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 8, + "end_col_offset_idx": 9, + "text": "100", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 279.82720947265625, + "t": 525.3848876953125, + "r": 289.8913269042969, + "b": 519.05712890625, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 9, + "end_col_offset_idx": 10, + "text": "92-97", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 298.6596374511719, + "t": 525.3848876953125, + "r": 305.0469970703125, + "b": 519.05712890625, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 10, + "end_col_offset_idx": 11, + "text": "100", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 310.1340026855469, + "t": 525.3848876953125, + "r": 320.1981506347656, + "b": 519.05712890625, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 11, + "end_col_offset_idx": 12, + "text": "96-98", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 114.73330688476562, + "t": 519.7886962890625, + "r": 137.85604858398438, + "b": 513.4608764648438, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 7, + "end_row_offset_idx": 8, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Page-header", + "column_header": false, + "row_header": true, + "row_section": false + }, + { + "bbox": { + "l": 152.03225708007812, + "t": 519.7886962890625, + "r": 162.67787170410156, + "b": 513.4608764648438, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 7, + "end_row_offset_idx": 8, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "58022", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 173.1453857421875, + "t": 519.7886962890625, + "r": 180.5400848388672, + "b": 513.4608764648438, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 7, + "end_row_offset_idx": 8, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "5.10", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 188.87802124023438, + "t": 519.7886962890625, + "r": 196.27272033691406, + "b": 513.4608764648438, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 7, + "end_row_offset_idx": 8, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "6.70", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 204.6106414794922, + "t": 519.7886962890625, + "r": 212.00534057617188, + "b": 513.4608764648438, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 7, + "end_row_offset_idx": 8, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "5.06", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 217.09361267089844, + "t": 519.7886962890625, + "r": 227.15773010253906, + "b": 513.4608764648438, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 7, + "end_row_offset_idx": 8, + "start_col_offset_idx": 5, + "end_col_offset_idx": 6, + "text": "85-89", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 232.24473571777344, + "t": 519.7886962890625, + "r": 242.30885314941406, + "b": 513.4608764648438, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 7, + "end_row_offset_idx": 8, + "start_col_offset_idx": 6, + "end_col_offset_idx": 7, + "text": "66-76", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 247.39585876464844, + "t": 519.7886962890625, + "r": 257.4599609375, + "b": 513.4608764648438, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 7, + "end_row_offset_idx": 8, + "start_col_offset_idx": 7, + "end_col_offset_idx": 8, + "text": "90-94", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 262.5469665527344, + "t": 519.7886962890625, + "r": 274.7402038574219, + "b": 513.4608764648438, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 7, + "end_row_offset_idx": 8, + "start_col_offset_idx": 8, + "end_col_offset_idx": 9, + "text": "98-100", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 279.82720947265625, + "t": 519.7886962890625, + "r": 289.8913269042969, + "b": 513.4608764648438, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 7, + "end_row_offset_idx": 8, + "start_col_offset_idx": 9, + "end_col_offset_idx": 10, + "text": "91-92", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 294.98291015625, + "t": 519.7886962890625, + "r": 305.0470275878906, + "b": 513.4608764648438, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 7, + "end_row_offset_idx": 8, + "start_col_offset_idx": 10, + "end_col_offset_idx": 11, + "text": "97-99", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 310.134033203125, + "t": 519.7886962890625, + "r": 320.19818115234375, + "b": 513.4608764648438, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 7, + "end_row_offset_idx": 8, + "start_col_offset_idx": 11, + "end_col_offset_idx": 12, + "text": "81-86", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 114.73330688476562, + "t": 514.1924438476562, + "r": 128.0025634765625, + "b": 507.8646545410156, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 8, + "end_row_offset_idx": 9, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Picture", + "column_header": false, + "row_header": true, + "row_section": false + }, + { + "bbox": { + "l": 152.03225708007812, + "t": 514.1924438476562, + "r": 162.67787170410156, + "b": 507.8646545410156, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 8, + "end_row_offset_idx": 9, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "45976", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 173.1453857421875, + "t": 514.1924438476562, + "r": 180.5400848388672, + "b": 507.8646545410156, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 8, + "end_row_offset_idx": 9, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "4.21", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 188.87802124023438, + "t": 514.1924438476562, + "r": 196.27272033691406, + "b": 507.8646545410156, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 8, + "end_row_offset_idx": 9, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "2.78", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 204.6106414794922, + "t": 514.1924438476562, + "r": 212.00534057617188, + "b": 507.8646545410156, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 8, + "end_row_offset_idx": 9, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "5.31", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 217.09361267089844, + "t": 514.1924438476562, + "r": 227.15773010253906, + "b": 507.8646545410156, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 8, + "end_row_offset_idx": 9, + "start_col_offset_idx": 5, + "end_col_offset_idx": 6, + "text": "69-71", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 232.24473571777344, + "t": 514.1924438476562, + "r": 242.30885314941406, + "b": 507.8646545410156, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 8, + "end_row_offset_idx": 9, + "start_col_offset_idx": 6, + "end_col_offset_idx": 7, + "text": "56-59", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 247.39585876464844, + "t": 514.1924438476562, + "r": 257.4599609375, + "b": 507.8646545410156, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 8, + "end_row_offset_idx": 9, + "start_col_offset_idx": 7, + "end_col_offset_idx": 8, + "text": "82-86", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 264.67608642578125, + "t": 514.1924438476562, + "r": 274.7402038574219, + "b": 507.8646545410156, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 8, + "end_row_offset_idx": 9, + "start_col_offset_idx": 8, + "end_col_offset_idx": 9, + "text": "69-82", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 279.82720947265625, + "t": 514.1924438476562, + "r": 289.8913269042969, + "b": 507.8646545410156, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 8, + "end_row_offset_idx": 9, + "start_col_offset_idx": 9, + "end_col_offset_idx": 10, + "text": "80-95", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 294.98291015625, + "t": 514.1924438476562, + "r": 305.0470275878906, + "b": 507.8646545410156, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 8, + "end_row_offset_idx": 9, + "start_col_offset_idx": 10, + "end_col_offset_idx": 11, + "text": "66-71", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 310.134033203125, + "t": 514.1924438476562, + "r": 320.19818115234375, + "b": 507.8646545410156, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 8, + "end_row_offset_idx": 9, + "start_col_offset_idx": 11, + "end_col_offset_idx": 12, + "text": "59-76", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 114.73330688476562, + "t": 508.59619140625, + "r": 142.6866455078125, + "b": 502.26837158203125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 9, + "end_row_offset_idx": 10, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Section-header", + "column_header": false, + "row_header": true, + "row_section": false + }, + { + "bbox": { + "l": 149.9033203125, + "t": 508.59619140625, + "r": 162.67807006835938, + "b": 502.26837158203125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 9, + "end_row_offset_idx": 10, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "142884", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 171.01646423339844, + "t": 508.59619140625, + "r": 180.540283203125, + "b": 502.26837158203125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 9, + "end_row_offset_idx": 10, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "12.60", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 186.74908447265625, + "t": 508.59619140625, + "r": 196.2729034423828, + "b": 502.26837158203125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 9, + "end_row_offset_idx": 10, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "15.77", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 202.48170471191406, + "t": 508.59619140625, + "r": 212.0055389404297, + "b": 502.26837158203125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 9, + "end_row_offset_idx": 10, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "12.85", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 217.09361267089844, + "t": 508.59619140625, + "r": 227.15773010253906, + "b": 502.26837158203125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 9, + "end_row_offset_idx": 10, + "start_col_offset_idx": 5, + "end_col_offset_idx": 6, + "text": "83-84", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 232.24473571777344, + "t": 508.59619140625, + "r": 242.30885314941406, + "b": 502.26837158203125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 9, + "end_row_offset_idx": 10, + "start_col_offset_idx": 6, + "end_col_offset_idx": 7, + "text": "76-81", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 247.39585876464844, + "t": 508.59619140625, + "r": 257.4599609375, + "b": 502.26837158203125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 9, + "end_row_offset_idx": 10, + "start_col_offset_idx": 7, + "end_col_offset_idx": 8, + "text": "90-92", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 264.67608642578125, + "t": 508.59619140625, + "r": 274.7402038574219, + "b": 502.26837158203125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 9, + "end_row_offset_idx": 10, + "start_col_offset_idx": 8, + "end_col_offset_idx": 9, + "text": "94-95", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 279.82720947265625, + "t": 508.59619140625, + "r": 289.8913269042969, + "b": 502.26837158203125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 9, + "end_row_offset_idx": 10, + "start_col_offset_idx": 9, + "end_col_offset_idx": 10, + "text": "87-94", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 294.98291015625, + "t": 508.59619140625, + "r": 305.0470275878906, + "b": 502.26837158203125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 9, + "end_row_offset_idx": 10, + "start_col_offset_idx": 10, + "end_col_offset_idx": 11, + "text": "69-73", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 310.134033203125, + "t": 508.59619140625, + "r": 320.19818115234375, + "b": 502.26837158203125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 9, + "end_row_offset_idx": 10, + "start_col_offset_idx": 11, + "end_col_offset_idx": 12, + "text": "78-86", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 114.73330688476562, + "t": 502.99993896484375, + "r": 124.84779357910156, + "b": 496.6721496582031, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 10, + "end_row_offset_idx": 11, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Table", + "column_header": false, + "row_header": true, + "row_section": false + }, + { + "bbox": { + "l": 152.03225708007812, + "t": 502.99993896484375, + "r": 162.67787170410156, + "b": 496.6721496582031, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 10, + "end_row_offset_idx": 11, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "34733", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 173.1453857421875, + "t": 502.99993896484375, + "r": 180.5400848388672, + "b": 496.6721496582031, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 10, + "end_row_offset_idx": 11, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "3.20", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 188.87802124023438, + "t": 502.99993896484375, + "r": 196.27272033691406, + "b": 496.6721496582031, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 10, + "end_row_offset_idx": 11, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "2.27", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 204.6106414794922, + "t": 502.99993896484375, + "r": 212.00534057617188, + "b": 496.6721496582031, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 10, + "end_row_offset_idx": 11, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "3.60", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 217.09361267089844, + "t": 502.99993896484375, + "r": 227.15773010253906, + "b": 496.6721496582031, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 10, + "end_row_offset_idx": 11, + "start_col_offset_idx": 5, + "end_col_offset_idx": 6, + "text": "77-81", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 232.24473571777344, + "t": 502.99993896484375, + "r": 242.30885314941406, + "b": 496.6721496582031, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 10, + "end_row_offset_idx": 11, + "start_col_offset_idx": 6, + "end_col_offset_idx": 7, + "text": "75-80", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 247.39585876464844, + "t": 502.99993896484375, + "r": 257.4599609375, + "b": 496.6721496582031, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 10, + "end_row_offset_idx": 11, + "start_col_offset_idx": 7, + "end_col_offset_idx": 8, + "text": "83-86", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 264.67608642578125, + "t": 502.99993896484375, + "r": 274.7402038574219, + "b": 496.6721496582031, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 10, + "end_row_offset_idx": 11, + "start_col_offset_idx": 8, + "end_col_offset_idx": 9, + "text": "98-99", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 279.82720947265625, + "t": 502.99993896484375, + "r": 289.8913269042969, + "b": 496.6721496582031, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 10, + "end_row_offset_idx": 11, + "start_col_offset_idx": 9, + "end_col_offset_idx": 10, + "text": "58-80", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 294.98291015625, + "t": 502.99993896484375, + "r": 305.0470275878906, + "b": 496.6721496582031, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 10, + "end_row_offset_idx": 11, + "start_col_offset_idx": 10, + "end_col_offset_idx": 11, + "text": "79-84", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 310.134033203125, + "t": 502.99993896484375, + "r": 320.19818115234375, + "b": 496.6721496582031, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 10, + "end_row_offset_idx": 11, + "start_col_offset_idx": 11, + "end_col_offset_idx": 12, + "text": "70-85", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 114.73330688476562, + "t": 497.4042053222656, + "r": 122.88350677490234, + "b": 491.076416015625, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 11, + "end_row_offset_idx": 12, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Text", + "column_header": false, + "row_header": true, + "row_section": false + }, + { + "bbox": { + "l": 149.9033203125, + "t": 497.4042053222656, + "r": 162.67807006835938, + "b": 491.076416015625, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 11, + "end_row_offset_idx": 12, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "510377", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 171.01646423339844, + "t": 497.4042053222656, + "r": 180.540283203125, + "b": 491.076416015625, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 11, + "end_row_offset_idx": 12, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "45.82", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 186.74908447265625, + "t": 497.4042053222656, + "r": 196.2729034423828, + "b": 491.076416015625, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 11, + "end_row_offset_idx": 12, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "49.28", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 202.48170471191406, + "t": 497.4042053222656, + "r": 212.0055389404297, + "b": 491.076416015625, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 11, + "end_row_offset_idx": 12, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "45.00", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 217.09361267089844, + "t": 497.4042053222656, + "r": 227.15773010253906, + "b": 491.076416015625, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 11, + "end_row_offset_idx": 12, + "start_col_offset_idx": 5, + "end_col_offset_idx": 6, + "text": "84-86", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 232.24473571777344, + "t": 497.4042053222656, + "r": 242.30885314941406, + "b": 491.076416015625, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 11, + "end_row_offset_idx": 12, + "start_col_offset_idx": 6, + "end_col_offset_idx": 7, + "text": "81-86", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 247.39585876464844, + "t": 497.4042053222656, + "r": 257.4599609375, + "b": 491.076416015625, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 11, + "end_row_offset_idx": 12, + "start_col_offset_idx": 7, + "end_col_offset_idx": 8, + "text": "88-93", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 264.67608642578125, + "t": 497.4042053222656, + "r": 274.7402038574219, + "b": 491.076416015625, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 11, + "end_row_offset_idx": 12, + "start_col_offset_idx": 8, + "end_col_offset_idx": 9, + "text": "89-93", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 279.82720947265625, + "t": 497.4042053222656, + "r": 289.8913269042969, + "b": 491.076416015625, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 11, + "end_row_offset_idx": 12, + "start_col_offset_idx": 9, + "end_col_offset_idx": 10, + "text": "87-92", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 294.98291015625, + "t": 497.4042053222656, + "r": 305.0470275878906, + "b": 491.076416015625, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 11, + "end_row_offset_idx": 12, + "start_col_offset_idx": 10, + "end_col_offset_idx": 11, + "text": "71-79", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 310.134033203125, + "t": 497.4042053222656, + "r": 320.19818115234375, + "b": 491.076416015625, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 11, + "end_row_offset_idx": 12, + "start_col_offset_idx": 11, + "end_col_offset_idx": 12, + "text": "87-95", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 114.73330688476562, + "t": 491.8079833984375, + "r": 123.4100570678711, + "b": 485.4801940917969, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 12, + "end_row_offset_idx": 13, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Title", + "column_header": false, + "row_header": true, + "row_section": false + }, + { + "bbox": { + "l": 154.16119384765625, + "t": 491.8079833984375, + "r": 162.6776885986328, + "b": 485.4801940917969, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 12, + "end_row_offset_idx": 13, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "5071", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 173.1453857421875, + "t": 491.8079833984375, + "r": 180.5400848388672, + "b": 485.4801940917969, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 12, + "end_row_offset_idx": 13, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "0.47", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 188.87802124023438, + "t": 491.8079833984375, + "r": 196.27272033691406, + "b": 485.4801940917969, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 12, + "end_row_offset_idx": 13, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "0.30", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 204.6106414794922, + "t": 491.8079833984375, + "r": 212.00534057617188, + "b": 485.4801940917969, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 12, + "end_row_offset_idx": 13, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "0.50", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 217.09361267089844, + "t": 491.8079833984375, + "r": 227.15773010253906, + "b": 485.4801940917969, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 12, + "end_row_offset_idx": 13, + "start_col_offset_idx": 5, + "end_col_offset_idx": 6, + "text": "60-72", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 232.24473571777344, + "t": 491.8079833984375, + "r": 242.30885314941406, + "b": 485.4801940917969, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 12, + "end_row_offset_idx": 13, + "start_col_offset_idx": 6, + "end_col_offset_idx": 7, + "text": "24-63", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 247.39585876464844, + "t": 491.8079833984375, + "r": 257.4599609375, + "b": 485.4801940917969, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 12, + "end_row_offset_idx": 13, + "start_col_offset_idx": 7, + "end_col_offset_idx": 8, + "text": "50-63", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 262.5469665527344, + "t": 491.8079833984375, + "r": 274.7402038574219, + "b": 485.4801940917969, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 12, + "end_row_offset_idx": 13, + "start_col_offset_idx": 8, + "end_col_offset_idx": 9, + "text": "94-100", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 279.82720947265625, + "t": 491.8079833984375, + "r": 289.8913269042969, + "b": 485.4801940917969, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 12, + "end_row_offset_idx": 13, + "start_col_offset_idx": 9, + "end_col_offset_idx": 10, + "text": "82-96", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 294.98291015625, + "t": 491.8079833984375, + "r": 305.0470275878906, + "b": 485.4801940917969, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 12, + "end_row_offset_idx": 13, + "start_col_offset_idx": 10, + "end_col_offset_idx": 11, + "text": "68-79", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 310.134033203125, + "t": 491.8079833984375, + "r": 320.19818115234375, + "b": 485.4801940917969, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 12, + "end_row_offset_idx": 13, + "start_col_offset_idx": 11, + "end_col_offset_idx": 12, + "text": "24-56", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 114.73330688476562, + "t": 486.0079650878906, + "r": 124.2342529296875, + "b": 479.68017578125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 13, + "end_row_offset_idx": 14, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Total", + "column_header": false, + "row_header": true, + "row_section": false + }, + { + "bbox": { + "l": 147.7738800048828, + "t": 486.0079650878906, + "r": 162.67774963378906, + "b": 479.68017578125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 13, + "end_row_offset_idx": 14, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "1107470", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 167.76510620117188, + "t": 486.0079650878906, + "r": 180.5398406982422, + "b": 479.68017578125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 13, + "end_row_offset_idx": 14, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "941123", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 185.62684631347656, + "t": 486.0079650878906, + "r": 196.27247619628906, + "b": 479.68017578125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 13, + "end_row_offset_idx": 14, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "99816", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 201.35946655273438, + "t": 486.0079650878906, + "r": 212.00509643554688, + "b": 479.68017578125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 13, + "end_row_offset_idx": 14, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "66531", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 217.09361267089844, + "t": 486.0079650878906, + "r": 227.15773010253906, + "b": 479.68017578125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 13, + "end_row_offset_idx": 14, + "start_col_offset_idx": 5, + "end_col_offset_idx": 6, + "text": "82-83", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 232.24473571777344, + "t": 486.0079650878906, + "r": 242.30885314941406, + "b": 479.68017578125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 13, + "end_row_offset_idx": 14, + "start_col_offset_idx": 6, + "end_col_offset_idx": 7, + "text": "71-74", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 247.39585876464844, + "t": 486.0079650878906, + "r": 257.4599609375, + "b": 479.68017578125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 13, + "end_row_offset_idx": 14, + "start_col_offset_idx": 7, + "end_col_offset_idx": 8, + "text": "79-81", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 264.67608642578125, + "t": 486.0079650878906, + "r": 274.7402038574219, + "b": 479.68017578125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 13, + "end_row_offset_idx": 14, + "start_col_offset_idx": 8, + "end_col_offset_idx": 9, + "text": "89-94", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 279.82720947265625, + "t": 486.0079650878906, + "r": 289.8913269042969, + "b": 479.68017578125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 13, + "end_row_offset_idx": 14, + "start_col_offset_idx": 9, + "end_col_offset_idx": 10, + "text": "86-91", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 294.98291015625, + "t": 486.0079650878906, + "r": 305.0470275878906, + "b": 479.68017578125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 13, + "end_row_offset_idx": 14, + "start_col_offset_idx": 10, + "end_col_offset_idx": 11, + "text": "71-76", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 310.134033203125, + "t": 486.0079650878906, + "r": 320.19818115234375, + "b": 479.68017578125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 13, + "end_row_offset_idx": 14, + "start_col_offset_idx": 11, + "end_col_offset_idx": 12, + "text": "68-85", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + "num_rows": 14, + "num_cols": 12, + "grid": [ + [ + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 151.05230712890625, + "t": 553.5693969726562, + "r": 162.6777801513672, + "b": 547.2415771484375, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "Count", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 180.66891479492188, + "t": 559.1656494140625, + "r": 199.10299682617188, + "b": 552.837890625, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 3, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 2, + "end_col_offset_idx": 5, + "text": "% of Total", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 180.66891479492188, + "t": 559.1656494140625, + "r": 199.10299682617188, + "b": 552.837890625, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 3, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 2, + "end_col_offset_idx": 5, + "text": "% of Total", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 180.66891479492188, + "t": 559.1656494140625, + "r": 199.10299682617188, + "b": 552.837890625, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 3, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 2, + "end_col_offset_idx": 5, + "text": "% of Total", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 229.23550415039062, + "t": 559.1656494140625, + "r": 308.0542907714844, + "b": 552.837890625, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 7, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 5, + "end_col_offset_idx": 12, + "text": "triple inter-annotator mAP @ 0.5-0.95 (%)", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 229.23550415039062, + "t": 559.1656494140625, + "r": 308.0542907714844, + "b": 552.837890625, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 7, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 5, + "end_col_offset_idx": 12, + "text": "triple inter-annotator mAP @ 0.5-0.95 (%)", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 229.23550415039062, + "t": 559.1656494140625, + "r": 308.0542907714844, + "b": 552.837890625, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 7, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 5, + "end_col_offset_idx": 12, + "text": "triple inter-annotator mAP @ 0.5-0.95 (%)", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 229.23550415039062, + "t": 559.1656494140625, + "r": 308.0542907714844, + "b": 552.837890625, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 7, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 5, + "end_col_offset_idx": 12, + "text": "triple inter-annotator mAP @ 0.5-0.95 (%)", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 229.23550415039062, + "t": 559.1656494140625, + "r": 308.0542907714844, + "b": 552.837890625, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 7, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 5, + "end_col_offset_idx": 12, + "text": "triple inter-annotator mAP @ 0.5-0.95 (%)", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 229.23550415039062, + "t": 559.1656494140625, + "r": 308.0542907714844, + "b": 552.837890625, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 7, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 5, + "end_col_offset_idx": 12, + "text": "triple inter-annotator mAP @ 0.5-0.95 (%)", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 229.23550415039062, + "t": 559.1656494140625, + "r": 308.0542907714844, + "b": 552.837890625, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 7, + "start_row_offset_idx": 0, + "end_row_offset_idx": 1, + "start_col_offset_idx": 5, + "end_col_offset_idx": 12, + "text": "triple inter-annotator mAP @ 0.5-0.95 (%)", + "column_header": true, + "row_header": false, + "row_section": false + } + ], + [ + { + "bbox": { + "l": 114.73330688476562, + "t": 553.5693969726562, + "r": 133.57032775878906, + "b": 547.2415771484375, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "class label", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 170.37966918945312, + "t": 553.5693969726562, + "r": 180.53993225097656, + "b": 547.2415771484375, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "Train", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 188.54818725585938, + "t": 553.5693969726562, + "r": 196.27256774902344, + "b": 547.2415771484375, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "Test", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 205.99327087402344, + "t": 553.5693969726562, + "r": 212.00518798828125, + "b": 547.2415771484375, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "Val", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 221.5577850341797, + "t": 553.5693969726562, + "r": 227.15760803222656, + "b": 547.2415771484375, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 5, + "end_col_offset_idx": 6, + "text": "All", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 236.36549377441406, + "t": 553.5693969726562, + "r": 242.30873107910156, + "b": 547.2415771484375, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 6, + "end_col_offset_idx": 7, + "text": "Fin", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 249.04408264160156, + "t": 553.5693969726562, + "r": 257.4598388671875, + "b": 547.2415771484375, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 7, + "end_col_offset_idx": 8, + "text": "Man", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 269.3188171386719, + "t": 553.5693969726562, + "r": 274.7400817871094, + "b": 547.2415771484375, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 8, + "end_col_offset_idx": 9, + "text": "Sci", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 281.9607849121094, + "t": 553.5693969726562, + "r": 289.8912048339844, + "b": 547.2415771484375, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 9, + "end_col_offset_idx": 10, + "text": "Law", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 299.0303955078125, + "t": 553.5693969726562, + "r": 305.0469055175781, + "b": 547.2415771484375, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 10, + "end_col_offset_idx": 11, + "text": "Pat", + "column_header": true, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 313.22454833984375, + "t": 553.5693969726562, + "r": 320.1980285644531, + "b": 547.2415771484375, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 1, + "end_row_offset_idx": 2, + "start_col_offset_idx": 11, + "end_col_offset_idx": 12, + "text": "T en", + "column_header": true, + "row_header": false, + "row_section": false + } + ], + [ + { + "bbox": { + "l": 114.73330688476562, + "t": 547.7698974609375, + "r": 129.63717651367188, + "b": 541.442138671875, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Caption", + "column_header": false, + "row_header": true, + "row_section": false + }, + { + "bbox": { + "l": 152.03225708007812, + "t": 547.7698974609375, + "r": 162.67787170410156, + "b": 541.442138671875, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "22524", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 173.1453857421875, + "t": 547.7698974609375, + "r": 180.5400848388672, + "b": 541.442138671875, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "2.04", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 188.87802124023438, + "t": 547.7698974609375, + "r": 196.27272033691406, + "b": 541.442138671875, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "1.77", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 204.6106414794922, + "t": 547.7698974609375, + "r": 212.00534057617188, + "b": 541.442138671875, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "2.32", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 217.09361267089844, + "t": 547.7698974609375, + "r": 227.15773010253906, + "b": 541.442138671875, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 5, + "end_col_offset_idx": 6, + "text": "84-89", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 232.24473571777344, + "t": 547.7698974609375, + "r": 242.30885314941406, + "b": 541.442138671875, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 6, + "end_col_offset_idx": 7, + "text": "40-61", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 247.39585876464844, + "t": 547.7698974609375, + "r": 257.4599609375, + "b": 541.442138671875, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 7, + "end_col_offset_idx": 8, + "text": "86-92", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 264.67608642578125, + "t": 547.7698974609375, + "r": 274.7402038574219, + "b": 541.442138671875, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 8, + "end_col_offset_idx": 9, + "text": "94-99", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 279.82720947265625, + "t": 547.7698974609375, + "r": 289.8913269042969, + "b": 541.442138671875, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 9, + "end_col_offset_idx": 10, + "text": "95-99", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 294.98291015625, + "t": 547.7698974609375, + "r": 305.0470275878906, + "b": 541.442138671875, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 10, + "end_col_offset_idx": 11, + "text": "69-78", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 314.14501953125, + "t": 547.7698974609375, + "r": 320.1981506347656, + "b": 541.442138671875, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 2, + "end_row_offset_idx": 3, + "start_col_offset_idx": 11, + "end_col_offset_idx": 12, + "text": "n/a", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "bbox": { + "l": 114.73330688476562, + "t": 542.1737060546875, + "r": 131.33132934570312, + "b": 535.8458251953125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Footnote", + "column_header": false, + "row_header": true, + "row_section": false + }, + { + "bbox": { + "l": 154.16119384765625, + "t": 542.1737060546875, + "r": 162.6776885986328, + "b": 535.8458251953125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "6318", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 173.1453857421875, + "t": 542.1737060546875, + "r": 180.5400848388672, + "b": 535.8458251953125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "0.60", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 188.87802124023438, + "t": 542.1737060546875, + "r": 196.27272033691406, + "b": 535.8458251953125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "0.31", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 204.6106414794922, + "t": 542.1737060546875, + "r": 212.00534057617188, + "b": 535.8458251953125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "0.58", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 217.09361267089844, + "t": 542.1737060546875, + "r": 227.15773010253906, + "b": 535.8458251953125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 5, + "end_col_offset_idx": 6, + "text": "83-91", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 236.25572204589844, + "t": 542.1737060546875, + "r": 242.308837890625, + "b": 535.8458251953125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 6, + "end_col_offset_idx": 7, + "text": "n/a", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 251.0725860595703, + "t": 542.1737060546875, + "r": 257.4599304199219, + "b": 535.8458251953125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 7, + "end_col_offset_idx": 8, + "text": "100", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 264.6760559082031, + "t": 542.1737060546875, + "r": 274.74017333984375, + "b": 535.8458251953125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 8, + "end_col_offset_idx": 9, + "text": "62-88", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 279.8271789550781, + "t": 542.1737060546875, + "r": 289.89129638671875, + "b": 535.8458251953125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 9, + "end_col_offset_idx": 10, + "text": "85-94", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 298.9938659667969, + "t": 542.1737060546875, + "r": 305.0469970703125, + "b": 535.8458251953125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 10, + "end_col_offset_idx": 11, + "text": "n/a", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 310.1340026855469, + "t": 542.1737060546875, + "r": 320.1981201171875, + "b": 535.8458251953125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 3, + "end_row_offset_idx": 4, + "start_col_offset_idx": 11, + "end_col_offset_idx": 12, + "text": "82-97", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "bbox": { + "l": 114.73330688476562, + "t": 536.5774536132812, + "r": 130.31483459472656, + "b": 530.2496337890625, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Formula", + "column_header": false, + "row_header": true, + "row_section": false + }, + { + "bbox": { + "l": 152.03225708007812, + "t": 536.5774536132812, + "r": 162.67787170410156, + "b": 530.2496337890625, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "25027", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 173.1453857421875, + "t": 536.5774536132812, + "r": 180.5400848388672, + "b": 530.2496337890625, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "2.25", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 188.87802124023438, + "t": 536.5774536132812, + "r": 196.27272033691406, + "b": 530.2496337890625, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "1.90", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 204.6106414794922, + "t": 536.5774536132812, + "r": 212.00534057617188, + "b": 530.2496337890625, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "2.96", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 217.09361267089844, + "t": 536.5774536132812, + "r": 227.15773010253906, + "b": 530.2496337890625, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 5, + "end_col_offset_idx": 6, + "text": "83-85", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 6, + "end_col_offset_idx": 7, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 251.40682983398438, + "t": 536.5774536132812, + "r": 257.4599304199219, + "b": 530.2496337890625, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 7, + "end_col_offset_idx": 8, + "text": "n/a", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 264.6760559082031, + "t": 536.5774536132812, + "r": 274.74017333984375, + "b": 530.2496337890625, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 8, + "end_col_offset_idx": 9, + "text": "84-87", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 279.8271789550781, + "t": 536.5774536132812, + "r": 289.89129638671875, + "b": 530.2496337890625, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 9, + "end_col_offset_idx": 10, + "text": "86-96", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 10, + "end_col_offset_idx": 11, + "text": "", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 314.1449890136719, + "t": 536.5774536132812, + "r": 320.1981201171875, + "b": 530.2496337890625, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 4, + "end_row_offset_idx": 5, + "start_col_offset_idx": 11, + "end_col_offset_idx": 12, + "text": "n/a", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "bbox": { + "l": 114.73330688476562, + "t": 530.981201171875, + "r": 131.5236358642578, + "b": 524.6533203125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "List-item", + "column_header": false, + "row_header": true, + "row_section": false + }, + { + "bbox": { + "l": 149.9033203125, + "t": 530.981201171875, + "r": 162.67807006835938, + "b": 524.6533203125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "185660", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 171.01646423339844, + "t": 530.981201171875, + "r": 180.540283203125, + "b": 524.6533203125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "17.19", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 186.74908447265625, + "t": 530.981201171875, + "r": 196.2729034423828, + "b": 524.6533203125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "13.34", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 202.48170471191406, + "t": 530.981201171875, + "r": 212.0055389404297, + "b": 524.6533203125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "15.82", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 217.09361267089844, + "t": 530.981201171875, + "r": 227.15773010253906, + "b": 524.6533203125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 5, + "end_col_offset_idx": 6, + "text": "87-88", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 232.24473571777344, + "t": 530.981201171875, + "r": 242.30885314941406, + "b": 524.6533203125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 6, + "end_col_offset_idx": 7, + "text": "74-83", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 247.39585876464844, + "t": 530.981201171875, + "r": 257.4599609375, + "b": 524.6533203125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 7, + "end_col_offset_idx": 8, + "text": "90-92", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 264.67608642578125, + "t": 530.981201171875, + "r": 274.7402038574219, + "b": 524.6533203125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 8, + "end_col_offset_idx": 9, + "text": "97-97", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 279.82720947265625, + "t": 530.981201171875, + "r": 289.8913269042969, + "b": 524.6533203125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 9, + "end_col_offset_idx": 10, + "text": "81-85", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 294.98291015625, + "t": 530.981201171875, + "r": 305.0470275878906, + "b": 524.6533203125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 10, + "end_col_offset_idx": 11, + "text": "75-88", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 310.134033203125, + "t": 530.981201171875, + "r": 320.19818115234375, + "b": 524.6533203125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 5, + "end_row_offset_idx": 6, + "start_col_offset_idx": 11, + "end_col_offset_idx": 12, + "text": "93-95", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "bbox": { + "l": 114.73330688476562, + "t": 525.3848876953125, + "r": 136.45037841796875, + "b": 519.05712890625, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Page-footer", + "column_header": false, + "row_header": true, + "row_section": false + }, + { + "bbox": { + "l": 152.03225708007812, + "t": 525.3848876953125, + "r": 162.67787170410156, + "b": 519.05712890625, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "70878", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 173.1453857421875, + "t": 525.3848876953125, + "r": 180.5400848388672, + "b": 519.05712890625, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "6.51", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 188.87802124023438, + "t": 525.3848876953125, + "r": 196.27272033691406, + "b": 519.05712890625, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "5.58", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 204.6106414794922, + "t": 525.3848876953125, + "r": 212.00534057617188, + "b": 519.05712890625, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "6.00", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 217.09361267089844, + "t": 525.3848876953125, + "r": 227.15773010253906, + "b": 519.05712890625, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 5, + "end_col_offset_idx": 6, + "text": "93-94", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 232.24473571777344, + "t": 525.3848876953125, + "r": 242.30885314941406, + "b": 519.05712890625, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 6, + "end_col_offset_idx": 7, + "text": "88-90", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 247.39585876464844, + "t": 525.3848876953125, + "r": 257.4599609375, + "b": 519.05712890625, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 7, + "end_col_offset_idx": 8, + "text": "95-96", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 268.35284423828125, + "t": 525.3848876953125, + "r": 274.7402038574219, + "b": 519.05712890625, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 8, + "end_col_offset_idx": 9, + "text": "100", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 279.82720947265625, + "t": 525.3848876953125, + "r": 289.8913269042969, + "b": 519.05712890625, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 9, + "end_col_offset_idx": 10, + "text": "92-97", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 298.6596374511719, + "t": 525.3848876953125, + "r": 305.0469970703125, + "b": 519.05712890625, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 10, + "end_col_offset_idx": 11, + "text": "100", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 310.1340026855469, + "t": 525.3848876953125, + "r": 320.1981506347656, + "b": 519.05712890625, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 6, + "end_row_offset_idx": 7, + "start_col_offset_idx": 11, + "end_col_offset_idx": 12, + "text": "96-98", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "bbox": { + "l": 114.73330688476562, + "t": 519.7886962890625, + "r": 137.85604858398438, + "b": 513.4608764648438, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 7, + "end_row_offset_idx": 8, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Page-header", + "column_header": false, + "row_header": true, + "row_section": false + }, + { + "bbox": { + "l": 152.03225708007812, + "t": 519.7886962890625, + "r": 162.67787170410156, + "b": 513.4608764648438, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 7, + "end_row_offset_idx": 8, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "58022", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 173.1453857421875, + "t": 519.7886962890625, + "r": 180.5400848388672, + "b": 513.4608764648438, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 7, + "end_row_offset_idx": 8, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "5.10", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 188.87802124023438, + "t": 519.7886962890625, + "r": 196.27272033691406, + "b": 513.4608764648438, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 7, + "end_row_offset_idx": 8, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "6.70", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 204.6106414794922, + "t": 519.7886962890625, + "r": 212.00534057617188, + "b": 513.4608764648438, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 7, + "end_row_offset_idx": 8, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "5.06", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 217.09361267089844, + "t": 519.7886962890625, + "r": 227.15773010253906, + "b": 513.4608764648438, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 7, + "end_row_offset_idx": 8, + "start_col_offset_idx": 5, + "end_col_offset_idx": 6, + "text": "85-89", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 232.24473571777344, + "t": 519.7886962890625, + "r": 242.30885314941406, + "b": 513.4608764648438, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 7, + "end_row_offset_idx": 8, + "start_col_offset_idx": 6, + "end_col_offset_idx": 7, + "text": "66-76", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 247.39585876464844, + "t": 519.7886962890625, + "r": 257.4599609375, + "b": 513.4608764648438, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 7, + "end_row_offset_idx": 8, + "start_col_offset_idx": 7, + "end_col_offset_idx": 8, + "text": "90-94", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 262.5469665527344, + "t": 519.7886962890625, + "r": 274.7402038574219, + "b": 513.4608764648438, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 7, + "end_row_offset_idx": 8, + "start_col_offset_idx": 8, + "end_col_offset_idx": 9, + "text": "98-100", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 279.82720947265625, + "t": 519.7886962890625, + "r": 289.8913269042969, + "b": 513.4608764648438, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 7, + "end_row_offset_idx": 8, + "start_col_offset_idx": 9, + "end_col_offset_idx": 10, + "text": "91-92", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 294.98291015625, + "t": 519.7886962890625, + "r": 305.0470275878906, + "b": 513.4608764648438, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 7, + "end_row_offset_idx": 8, + "start_col_offset_idx": 10, + "end_col_offset_idx": 11, + "text": "97-99", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 310.134033203125, + "t": 519.7886962890625, + "r": 320.19818115234375, + "b": 513.4608764648438, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 7, + "end_row_offset_idx": 8, + "start_col_offset_idx": 11, + "end_col_offset_idx": 12, + "text": "81-86", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "bbox": { + "l": 114.73330688476562, + "t": 514.1924438476562, + "r": 128.0025634765625, + "b": 507.8646545410156, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 8, + "end_row_offset_idx": 9, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Picture", + "column_header": false, + "row_header": true, + "row_section": false + }, + { + "bbox": { + "l": 152.03225708007812, + "t": 514.1924438476562, + "r": 162.67787170410156, + "b": 507.8646545410156, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 8, + "end_row_offset_idx": 9, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "45976", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 173.1453857421875, + "t": 514.1924438476562, + "r": 180.5400848388672, + "b": 507.8646545410156, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 8, + "end_row_offset_idx": 9, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "4.21", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 188.87802124023438, + "t": 514.1924438476562, + "r": 196.27272033691406, + "b": 507.8646545410156, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 8, + "end_row_offset_idx": 9, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "2.78", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 204.6106414794922, + "t": 514.1924438476562, + "r": 212.00534057617188, + "b": 507.8646545410156, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 8, + "end_row_offset_idx": 9, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "5.31", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 217.09361267089844, + "t": 514.1924438476562, + "r": 227.15773010253906, + "b": 507.8646545410156, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 8, + "end_row_offset_idx": 9, + "start_col_offset_idx": 5, + "end_col_offset_idx": 6, + "text": "69-71", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 232.24473571777344, + "t": 514.1924438476562, + "r": 242.30885314941406, + "b": 507.8646545410156, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 8, + "end_row_offset_idx": 9, + "start_col_offset_idx": 6, + "end_col_offset_idx": 7, + "text": "56-59", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 247.39585876464844, + "t": 514.1924438476562, + "r": 257.4599609375, + "b": 507.8646545410156, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 8, + "end_row_offset_idx": 9, + "start_col_offset_idx": 7, + "end_col_offset_idx": 8, + "text": "82-86", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 264.67608642578125, + "t": 514.1924438476562, + "r": 274.7402038574219, + "b": 507.8646545410156, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 8, + "end_row_offset_idx": 9, + "start_col_offset_idx": 8, + "end_col_offset_idx": 9, + "text": "69-82", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 279.82720947265625, + "t": 514.1924438476562, + "r": 289.8913269042969, + "b": 507.8646545410156, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 8, + "end_row_offset_idx": 9, + "start_col_offset_idx": 9, + "end_col_offset_idx": 10, + "text": "80-95", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 294.98291015625, + "t": 514.1924438476562, + "r": 305.0470275878906, + "b": 507.8646545410156, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 8, + "end_row_offset_idx": 9, + "start_col_offset_idx": 10, + "end_col_offset_idx": 11, + "text": "66-71", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 310.134033203125, + "t": 514.1924438476562, + "r": 320.19818115234375, + "b": 507.8646545410156, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 8, + "end_row_offset_idx": 9, + "start_col_offset_idx": 11, + "end_col_offset_idx": 12, + "text": "59-76", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "bbox": { + "l": 114.73330688476562, + "t": 508.59619140625, + "r": 142.6866455078125, + "b": 502.26837158203125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 9, + "end_row_offset_idx": 10, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Section-header", + "column_header": false, + "row_header": true, + "row_section": false + }, + { + "bbox": { + "l": 149.9033203125, + "t": 508.59619140625, + "r": 162.67807006835938, + "b": 502.26837158203125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 9, + "end_row_offset_idx": 10, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "142884", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 171.01646423339844, + "t": 508.59619140625, + "r": 180.540283203125, + "b": 502.26837158203125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 9, + "end_row_offset_idx": 10, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "12.60", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 186.74908447265625, + "t": 508.59619140625, + "r": 196.2729034423828, + "b": 502.26837158203125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 9, + "end_row_offset_idx": 10, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "15.77", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 202.48170471191406, + "t": 508.59619140625, + "r": 212.0055389404297, + "b": 502.26837158203125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 9, + "end_row_offset_idx": 10, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "12.85", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 217.09361267089844, + "t": 508.59619140625, + "r": 227.15773010253906, + "b": 502.26837158203125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 9, + "end_row_offset_idx": 10, + "start_col_offset_idx": 5, + "end_col_offset_idx": 6, + "text": "83-84", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 232.24473571777344, + "t": 508.59619140625, + "r": 242.30885314941406, + "b": 502.26837158203125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 9, + "end_row_offset_idx": 10, + "start_col_offset_idx": 6, + "end_col_offset_idx": 7, + "text": "76-81", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 247.39585876464844, + "t": 508.59619140625, + "r": 257.4599609375, + "b": 502.26837158203125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 9, + "end_row_offset_idx": 10, + "start_col_offset_idx": 7, + "end_col_offset_idx": 8, + "text": "90-92", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 264.67608642578125, + "t": 508.59619140625, + "r": 274.7402038574219, + "b": 502.26837158203125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 9, + "end_row_offset_idx": 10, + "start_col_offset_idx": 8, + "end_col_offset_idx": 9, + "text": "94-95", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 279.82720947265625, + "t": 508.59619140625, + "r": 289.8913269042969, + "b": 502.26837158203125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 9, + "end_row_offset_idx": 10, + "start_col_offset_idx": 9, + "end_col_offset_idx": 10, + "text": "87-94", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 294.98291015625, + "t": 508.59619140625, + "r": 305.0470275878906, + "b": 502.26837158203125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 9, + "end_row_offset_idx": 10, + "start_col_offset_idx": 10, + "end_col_offset_idx": 11, + "text": "69-73", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 310.134033203125, + "t": 508.59619140625, + "r": 320.19818115234375, + "b": 502.26837158203125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 9, + "end_row_offset_idx": 10, + "start_col_offset_idx": 11, + "end_col_offset_idx": 12, + "text": "78-86", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "bbox": { + "l": 114.73330688476562, + "t": 502.99993896484375, + "r": 124.84779357910156, + "b": 496.6721496582031, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 10, + "end_row_offset_idx": 11, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Table", + "column_header": false, + "row_header": true, + "row_section": false + }, + { + "bbox": { + "l": 152.03225708007812, + "t": 502.99993896484375, + "r": 162.67787170410156, + "b": 496.6721496582031, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 10, + "end_row_offset_idx": 11, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "34733", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 173.1453857421875, + "t": 502.99993896484375, + "r": 180.5400848388672, + "b": 496.6721496582031, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 10, + "end_row_offset_idx": 11, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "3.20", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 188.87802124023438, + "t": 502.99993896484375, + "r": 196.27272033691406, + "b": 496.6721496582031, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 10, + "end_row_offset_idx": 11, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "2.27", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 204.6106414794922, + "t": 502.99993896484375, + "r": 212.00534057617188, + "b": 496.6721496582031, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 10, + "end_row_offset_idx": 11, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "3.60", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 217.09361267089844, + "t": 502.99993896484375, + "r": 227.15773010253906, + "b": 496.6721496582031, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 10, + "end_row_offset_idx": 11, + "start_col_offset_idx": 5, + "end_col_offset_idx": 6, + "text": "77-81", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 232.24473571777344, + "t": 502.99993896484375, + "r": 242.30885314941406, + "b": 496.6721496582031, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 10, + "end_row_offset_idx": 11, + "start_col_offset_idx": 6, + "end_col_offset_idx": 7, + "text": "75-80", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 247.39585876464844, + "t": 502.99993896484375, + "r": 257.4599609375, + "b": 496.6721496582031, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 10, + "end_row_offset_idx": 11, + "start_col_offset_idx": 7, + "end_col_offset_idx": 8, + "text": "83-86", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 264.67608642578125, + "t": 502.99993896484375, + "r": 274.7402038574219, + "b": 496.6721496582031, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 10, + "end_row_offset_idx": 11, + "start_col_offset_idx": 8, + "end_col_offset_idx": 9, + "text": "98-99", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 279.82720947265625, + "t": 502.99993896484375, + "r": 289.8913269042969, + "b": 496.6721496582031, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 10, + "end_row_offset_idx": 11, + "start_col_offset_idx": 9, + "end_col_offset_idx": 10, + "text": "58-80", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 294.98291015625, + "t": 502.99993896484375, + "r": 305.0470275878906, + "b": 496.6721496582031, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 10, + "end_row_offset_idx": 11, + "start_col_offset_idx": 10, + "end_col_offset_idx": 11, + "text": "79-84", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 310.134033203125, + "t": 502.99993896484375, + "r": 320.19818115234375, + "b": 496.6721496582031, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 10, + "end_row_offset_idx": 11, + "start_col_offset_idx": 11, + "end_col_offset_idx": 12, + "text": "70-85", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "bbox": { + "l": 114.73330688476562, + "t": 497.4042053222656, + "r": 122.88350677490234, + "b": 491.076416015625, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 11, + "end_row_offset_idx": 12, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Text", + "column_header": false, + "row_header": true, + "row_section": false + }, + { + "bbox": { + "l": 149.9033203125, + "t": 497.4042053222656, + "r": 162.67807006835938, + "b": 491.076416015625, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 11, + "end_row_offset_idx": 12, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "510377", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 171.01646423339844, + "t": 497.4042053222656, + "r": 180.540283203125, + "b": 491.076416015625, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 11, + "end_row_offset_idx": 12, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "45.82", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 186.74908447265625, + "t": 497.4042053222656, + "r": 196.2729034423828, + "b": 491.076416015625, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 11, + "end_row_offset_idx": 12, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "49.28", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 202.48170471191406, + "t": 497.4042053222656, + "r": 212.0055389404297, + "b": 491.076416015625, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 11, + "end_row_offset_idx": 12, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "45.00", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 217.09361267089844, + "t": 497.4042053222656, + "r": 227.15773010253906, + "b": 491.076416015625, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 11, + "end_row_offset_idx": 12, + "start_col_offset_idx": 5, + "end_col_offset_idx": 6, + "text": "84-86", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 232.24473571777344, + "t": 497.4042053222656, + "r": 242.30885314941406, + "b": 491.076416015625, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 11, + "end_row_offset_idx": 12, + "start_col_offset_idx": 6, + "end_col_offset_idx": 7, + "text": "81-86", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 247.39585876464844, + "t": 497.4042053222656, + "r": 257.4599609375, + "b": 491.076416015625, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 11, + "end_row_offset_idx": 12, + "start_col_offset_idx": 7, + "end_col_offset_idx": 8, + "text": "88-93", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 264.67608642578125, + "t": 497.4042053222656, + "r": 274.7402038574219, + "b": 491.076416015625, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 11, + "end_row_offset_idx": 12, + "start_col_offset_idx": 8, + "end_col_offset_idx": 9, + "text": "89-93", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 279.82720947265625, + "t": 497.4042053222656, + "r": 289.8913269042969, + "b": 491.076416015625, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 11, + "end_row_offset_idx": 12, + "start_col_offset_idx": 9, + "end_col_offset_idx": 10, + "text": "87-92", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 294.98291015625, + "t": 497.4042053222656, + "r": 305.0470275878906, + "b": 491.076416015625, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 11, + "end_row_offset_idx": 12, + "start_col_offset_idx": 10, + "end_col_offset_idx": 11, + "text": "71-79", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 310.134033203125, + "t": 497.4042053222656, + "r": 320.19818115234375, + "b": 491.076416015625, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 11, + "end_row_offset_idx": 12, + "start_col_offset_idx": 11, + "end_col_offset_idx": 12, + "text": "87-95", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "bbox": { + "l": 114.73330688476562, + "t": 491.8079833984375, + "r": 123.4100570678711, + "b": 485.4801940917969, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 12, + "end_row_offset_idx": 13, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Title", + "column_header": false, + "row_header": true, + "row_section": false + }, + { + "bbox": { + "l": 154.16119384765625, + "t": 491.8079833984375, + "r": 162.6776885986328, + "b": 485.4801940917969, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 12, + "end_row_offset_idx": 13, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "5071", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 173.1453857421875, + "t": 491.8079833984375, + "r": 180.5400848388672, + "b": 485.4801940917969, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 12, + "end_row_offset_idx": 13, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "0.47", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 188.87802124023438, + "t": 491.8079833984375, + "r": 196.27272033691406, + "b": 485.4801940917969, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 12, + "end_row_offset_idx": 13, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "0.30", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 204.6106414794922, + "t": 491.8079833984375, + "r": 212.00534057617188, + "b": 485.4801940917969, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 12, + "end_row_offset_idx": 13, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "0.50", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 217.09361267089844, + "t": 491.8079833984375, + "r": 227.15773010253906, + "b": 485.4801940917969, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 12, + "end_row_offset_idx": 13, + "start_col_offset_idx": 5, + "end_col_offset_idx": 6, + "text": "60-72", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 232.24473571777344, + "t": 491.8079833984375, + "r": 242.30885314941406, + "b": 485.4801940917969, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 12, + "end_row_offset_idx": 13, + "start_col_offset_idx": 6, + "end_col_offset_idx": 7, + "text": "24-63", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 247.39585876464844, + "t": 491.8079833984375, + "r": 257.4599609375, + "b": 485.4801940917969, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 12, + "end_row_offset_idx": 13, + "start_col_offset_idx": 7, + "end_col_offset_idx": 8, + "text": "50-63", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 262.5469665527344, + "t": 491.8079833984375, + "r": 274.7402038574219, + "b": 485.4801940917969, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 12, + "end_row_offset_idx": 13, + "start_col_offset_idx": 8, + "end_col_offset_idx": 9, + "text": "94-100", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 279.82720947265625, + "t": 491.8079833984375, + "r": 289.8913269042969, + "b": 485.4801940917969, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 12, + "end_row_offset_idx": 13, + "start_col_offset_idx": 9, + "end_col_offset_idx": 10, + "text": "82-96", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 294.98291015625, + "t": 491.8079833984375, + "r": 305.0470275878906, + "b": 485.4801940917969, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 12, + "end_row_offset_idx": 13, + "start_col_offset_idx": 10, + "end_col_offset_idx": 11, + "text": "68-79", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 310.134033203125, + "t": 491.8079833984375, + "r": 320.19818115234375, + "b": 485.4801940917969, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 12, + "end_row_offset_idx": 13, + "start_col_offset_idx": 11, + "end_col_offset_idx": 12, + "text": "24-56", + "column_header": false, + "row_header": false, + "row_section": false + } + ], + [ + { + "bbox": { + "l": 114.73330688476562, + "t": 486.0079650878906, + "r": 124.2342529296875, + "b": 479.68017578125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 13, + "end_row_offset_idx": 14, + "start_col_offset_idx": 0, + "end_col_offset_idx": 1, + "text": "Total", + "column_header": false, + "row_header": true, + "row_section": false + }, + { + "bbox": { + "l": 147.7738800048828, + "t": 486.0079650878906, + "r": 162.67774963378906, + "b": 479.68017578125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 13, + "end_row_offset_idx": 14, + "start_col_offset_idx": 1, + "end_col_offset_idx": 2, + "text": "1107470", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 167.76510620117188, + "t": 486.0079650878906, + "r": 180.5398406982422, + "b": 479.68017578125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 13, + "end_row_offset_idx": 14, + "start_col_offset_idx": 2, + "end_col_offset_idx": 3, + "text": "941123", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 185.62684631347656, + "t": 486.0079650878906, + "r": 196.27247619628906, + "b": 479.68017578125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 13, + "end_row_offset_idx": 14, + "start_col_offset_idx": 3, + "end_col_offset_idx": 4, + "text": "99816", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 201.35946655273438, + "t": 486.0079650878906, + "r": 212.00509643554688, + "b": 479.68017578125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 13, + "end_row_offset_idx": 14, + "start_col_offset_idx": 4, + "end_col_offset_idx": 5, + "text": "66531", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 217.09361267089844, + "t": 486.0079650878906, + "r": 227.15773010253906, + "b": 479.68017578125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 13, + "end_row_offset_idx": 14, + "start_col_offset_idx": 5, + "end_col_offset_idx": 6, + "text": "82-83", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 232.24473571777344, + "t": 486.0079650878906, + "r": 242.30885314941406, + "b": 479.68017578125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 13, + "end_row_offset_idx": 14, + "start_col_offset_idx": 6, + "end_col_offset_idx": 7, + "text": "71-74", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 247.39585876464844, + "t": 486.0079650878906, + "r": 257.4599609375, + "b": 479.68017578125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 13, + "end_row_offset_idx": 14, + "start_col_offset_idx": 7, + "end_col_offset_idx": 8, + "text": "79-81", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 264.67608642578125, + "t": 486.0079650878906, + "r": 274.7402038574219, + "b": 479.68017578125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 13, + "end_row_offset_idx": 14, + "start_col_offset_idx": 8, + "end_col_offset_idx": 9, + "text": "89-94", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 279.82720947265625, + "t": 486.0079650878906, + "r": 289.8913269042969, + "b": 479.68017578125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 13, + "end_row_offset_idx": 14, + "start_col_offset_idx": 9, + "end_col_offset_idx": 10, + "text": "86-91", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 294.98291015625, + "t": 486.0079650878906, + "r": 305.0470275878906, + "b": 479.68017578125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 13, + "end_row_offset_idx": 14, + "start_col_offset_idx": 10, + "end_col_offset_idx": 11, + "text": "71-76", + "column_header": false, + "row_header": false, + "row_section": false + }, + { + "bbox": { + "l": 310.134033203125, + "t": 486.0079650878906, + "r": 320.19818115234375, + "b": 479.68017578125, + "coord_origin": "BOTTOMLEFT" + }, + "row_span": 1, + "col_span": 1, + "start_row_offset_idx": 13, + "end_row_offset_idx": 14, + "start_col_offset_idx": 11, + "end_col_offset_idx": 12, + "text": "68-85", + "column_header": false, + "row_header": false, + "row_section": false + } + ] + ] + } + } + ], + "key_value_items": [], + "pages": { + "1": { + "size": { + "width": 612.0, + "height": 792.0 + }, + "page_no": 1 + }, + "2": { + "size": { + "width": 612.0, + "height": 792.0 + }, + "page_no": 2 + }, + "3": { + "size": { + "width": 612.0, + "height": 792.0 + }, + "page_no": 3 + }, + "4": { + "size": { + "width": 612.0, + "height": 792.0 + }, + "page_no": 4 + }, + "5": { + "size": { + "width": 612.0, + "height": 792.0 + }, + "page_no": 5 + }, + "6": { + "size": { + "width": 612.0, + "height": 792.0 + }, + "page_no": 6 + }, + "7": { + "size": { + "width": 612.0, + "height": 792.0 + }, + "page_no": 7 + }, + "8": { + "size": { + "width": 612.0, + "height": 792.0 + }, + "page_no": 8 + }, + "9": { + "size": { + "width": 612.0, + "height": 792.0 + }, + "page_no": 9 + } } } diff --git a/test/data/chunker/0_out_chunks.json b/test/data/chunker/0_out_chunks.json new file mode 100644 index 00000000..8cfdaf79 --- /dev/null +++ b/test/data/chunker/0_out_chunks.json @@ -0,0 +1,3858 @@ +{ + "root": [ + { + "text": "arXiv:2408.09869v3 [cs.CL] 30 Aug 2024", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/0", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "page_header", + "prov": [ + { + "page_no": 1, + "bbox": { + "l": 17.088111877441406, + "t": 583.2296752929688, + "r": 36.339778900146484, + "b": 231.99996948242188, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 38 + ] + } + ] + } + ] + } + }, + { + "text": "Version 1.0", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/2", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 1, + "bbox": { + "l": 282.772216796875, + "t": 512.7218017578125, + "r": 328.8624572753906, + "b": 503.340087890625, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 11 + ] + } + ] + } + ], + "headings": [ + "Docling Technical Report" + ] + } + }, + { + "text": "Christoph Auer Maksym Lysak Ahmed Nassar Michele Dolfi Nikolaos Livathinos Panos Vagenas Cesar Berrospi Ramis Matteo Omenetti Fabian Lindlbauer Kasper Dinkla Lokesh Mishra Yusik Kim Shubham Gupta Rafael Teixeira de Lima Valery Weber Lucas Morin Ingmar Meijer Viktor Kuropiatnyk Peter W. J. Staar", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/3", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 1, + "bbox": { + "l": 113.4512939453125, + "t": 482.4101257324219, + "r": 498.396728515625, + "b": 439.45928955078125, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 295 + ] + } + ] + } + ], + "headings": [ + "Docling Technical Report" + ] + } + }, + { + "text": "AI4K Group, IBM Research Ruschlikon, Switzerland", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/4", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 1, + "bbox": { + "l": 248.43727111816406, + "t": 428.638427734375, + "r": 362.8905029296875, + "b": 407.99810791015625, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 48 + ] + } + ] + } + ], + "headings": [ + "Docling Technical Report" + ] + } + }, + { + "text": "This technical report introduces Docling , an easy to use, self-contained, MITlicensed open-source package for PDF document conversion. It is powered by state-of-the-art specialized AI models for layout analysis (DocLayNet) and table structure recognition (TableFormer), and runs efficiently on commodity hardware in a small resource budget. The code interface allows for easy extensibility and addition of new features and models.", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/6", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 1, + "bbox": { + "l": 142.92593383789062, + "t": 364.814697265625, + "r": 468.3847351074219, + "b": 300.651123046875, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 431 + ] + } + ] + } + ], + "headings": [ + "Docling Technical Report", + "Abstract" + ] + } + }, + { + "text": "Converting PDF documents back into a machine-processable format has been a major challenge for decades due to their huge variability in formats, weak standardization and printing-optimized characteristic, which discards most structural features and metadata. With the advent of LLMs and popular application patterns such as retrieval-augmented generation (RAG), leveraging the rich content embedded in PDFs has become ever more relevant. In the past decade, several powerful document understanding solutions have emerged on the market, most of which are commercial software, cloud offerings [3] and most recently, multi-modal vision-language models. As of today, only a handful of open-source tools cover PDF conversion, leaving a significant feature and quality gap to proprietary solutions.", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/8", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 1, + "bbox": { + "l": 106.98738098144531, + "t": 240.2642822265625, + "r": 504.3785400390625, + "b": 142.53631591796875, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 792 + ] + } + ] + } + ], + "headings": [ + "1 Introduction" + ] + } + }, + { + "text": "With Docling , we open-source a very capable and efficient document conversion tool which builds on the powerful, specialized AI models and datasets for layout analysis and table structure recognition we developed and presented in the recent past [12, 13, 9]. Docling is designed as a simple, self-contained python library with permissive license, running entirely locally on commodity hardware. Its code architecture allows for easy extensibility and addition of new features and models.", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/9", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 1, + "bbox": { + "l": 107.0031967163086, + "t": 136.7283935546875, + "r": 504.04998779296875, + "b": 83.30133056640625, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 488 + ] + } + ] + } + ], + "headings": [ + "1 Introduction" + ] + } + }, + { + "text": "Docling Technical Report", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/10", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "page_footer", + "prov": [ + { + "page_no": 1, + "bbox": { + "l": 107.10411071777344, + "t": 58.48394775390625, + "r": 200.8249969482422, + "b": 49.8505859375, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 24 + ] + } + ] + } + ], + "headings": [ + "1 Introduction" + ] + } + }, + { + "text": "1", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/11", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "page_footer", + "prov": [ + { + "page_no": 1, + "bbox": { + "l": 303.50897216796875, + "t": 49.50579833984375, + "r": 308.4902648925781, + "b": 39.960147857666016, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 1 + ] + } + ] + } + ], + "headings": [ + "1 Introduction" + ] + } + }, + { + "text": "Here is what Docling delivers today:", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/12", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 2, + "bbox": { + "l": 107.48941802978516, + "t": 717.5628662109375, + "r": 253.97195434570312, + "b": 707.6951293945312, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 36 + ] + } + ] + } + ], + "headings": [ + "1 Introduction" + ] + } + }, + { + "text": "\u00b7 Converts PDF documents to JSON or Markdown format, stable and lightning fast\n\u00b7 Understands detailed page layout, reading order, locates figures and recovers table structures\n\u00b7 Extracts metadata from the document, such as title, authors, references and language\n\u00b7 Optionally applies OCR, e.g. for scanned PDFs\n\u00b7 Can be configured to be optimal for batch-mode (i.e high throughput, low time-to-solution) or interactive mode (compromise on efficiency, low time-to-solution)\n\u00b7 Can leverage different accelerators (GPU, MPS, etc).", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/13", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "list_item", + "prov": [ + { + "page_no": 2, + "bbox": { + "l": 134.6504669189453, + "t": 696.156494140625, + "r": 468.3969421386719, + "b": 686.3217163085938, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 78 + ] + } + ] + }, + { + "self_ref": "#/texts/14", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "list_item", + "prov": [ + { + "page_no": 2, + "bbox": { + "l": 134.72218322753906, + "t": 681.3009643554688, + "r": 504.0032653808594, + "b": 660.819091796875, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 96 + ] + } + ] + }, + { + "self_ref": "#/texts/15", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "list_item", + "prov": [ + { + "page_no": 2, + "bbox": { + "l": 134.9065399169922, + "t": 655.3751220703125, + "r": 480.8502502441406, + "b": 645.7429809570312, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 86 + ] + } + ] + }, + { + "self_ref": "#/texts/16", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "list_item", + "prov": [ + { + "page_no": 2, + "bbox": { + "l": 134.8793487548828, + "t": 640.9143676757812, + "r": 333.46343994140625, + "b": 630.7002563476562, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 47 + ] + } + ] + }, + { + "self_ref": "#/texts/17", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "list_item", + "prov": [ + { + "page_no": 2, + "bbox": { + "l": 135.0067901611328, + "t": 626.0984497070312, + "r": 504.003173828125, + "b": 604.8719482421875, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 161 + ] + } + ] + }, + { + "self_ref": "#/texts/18", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "list_item", + "prov": [ + { + "page_no": 2, + "bbox": { + "l": 134.7841339111328, + "t": 600.127685546875, + "r": 355.41107177734375, + "b": 590.395751953125, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 54 + ] + } + ] + } + ], + "headings": [ + "1 Introduction" + ] + } + }, + { + "text": "To use Docling, you can simply install the docling package from PyPI. Documentation and examples are available in our GitHub repository at github.com/DS4SD/docling. All required model assets 1 are downloaded to a local huggingface datasets cache on first use, unless you choose to pre-install the model assets in advance.", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/20", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 2, + "bbox": { + "l": 107.22560119628906, + "t": 548.7847900390625, + "r": 504.00341796875, + "b": 506.27606201171875, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 321 + ] + } + ] + } + ], + "headings": [ + "2 Getting Started" + ] + } + }, + { + "text": "Docling provides an easy code interface to convert PDF documents from file system, URLs or binary streams, and retrieve the output in either JSON or Markdown format. For convenience, separate methods are offered to convert single documents or batches of documents. A basic usage example is illustrated below. Further examples are available in the Doclign code repository.", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/21", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 2, + "bbox": { + "l": 107.38473510742188, + "t": 499.5434875488281, + "r": 504.0034484863281, + "b": 456.7132263183594, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 371 + ] + } + ] + } + ], + "headings": [ + "2 Getting Started" + ] + } + }, + { + "text": "from docling.document_converter import DocumentConverter source = \"https :// arxiv.org/pdf /2206.01062\" # PDF path or URL converter = DocumentConverter () result = converter.convert_single(source) print(result.render_as_markdown ()) # output: \"## DocLayNet: A Large Human -Annotated Dataset for Document -Layout Analysis [...]\"", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/22", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "code", + "prov": [ + { + "page_no": 2, + "bbox": { + "l": 107.45667266845703, + "t": 449.7299499511719, + "r": 491.58642578125, + "b": 380.3858642578125, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 327 + ] + } + ] + } + ], + "headings": [ + "2 Getting Started" + ] + } + }, + { + "text": "Optionally, you can configure custom pipeline features and runtime options, such as turning on or off features (e.g. OCR, table structure recognition), enforcing limits on the input document size, and defining the budget of CPU threads. Advanced usage examples and options are documented in the README file. Docling also provides a Dockerfile to demonstrate how to install and run it inside a container.", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/23", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 2, + "bbox": { + "l": 107.32361602783203, + "t": 368.8786926269531, + "r": 504.3451843261719, + "b": 315.56304931640625, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 403 + ] + } + ] + } + ], + "headings": [ + "2 Getting Started" + ] + } + }, + { + "text": "Docling implements a linear pipeline of operations, which execute sequentially on each given document (see Fig. 1). Each document is first parsed by a PDF backend, which retrieves the programmatic text tokens, consisting of string content and its coordinates on the page, and also renders a bitmap image of each page to support downstream operations. Then, the standard model pipeline applies a sequence of AI models independently on every page in the document to extract features and content, such as layout and table structures. Finally, the results from all pages are aggregated and passed through a post-processing stage, which augments metadata, detects the document language, infers reading-order and eventually assembles a typed document object which can be serialized to JSON or Markdown.", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/25", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 2, + "bbox": { + "l": 107.176025390625, + "t": 273.72723388671875, + "r": 504.06005859375, + "b": 176.83807373046875, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 796 + ] + } + ] + } + ], + "headings": [ + "3 Processing pipeline" + ] + } + }, + { + "text": "Two basic requirements to process PDF documents in our pipeline are a) to retrieve all text content and their geometric coordinates on each page and b) to render the visual representation of each page as it would appear in a PDF viewer. Both these requirements are encapsulated in Docling's PDF backend interface. While there are several open-source PDF parsing libraries available for python, we faced major obstacles with all of them for different reasons, among which were restrictive", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/27", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 2, + "bbox": { + "l": 107.26972198486328, + "t": 142.07904052734375, + "r": 504.2434997558594, + "b": 87.39227294921875, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 487 + ] + } + ] + } + ], + "headings": [ + "3.1 PDF backends" + ] + } + }, + { + "text": "$^{1}$see huggingface.co/ds4sd/docling-models/", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/28", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "footnote", + "prov": [ + { + "page_no": 2, + "bbox": { + "l": 120.65299987792969, + "t": 78.96942138671875, + "r": 276.9403076171875, + "b": 69.9141845703125, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 46 + ] + } + ] + } + ], + "headings": [ + "3.1 PDF backends" + ] + } + }, + { + "text": "2", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/29", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "page_footer", + "prov": [ + { + "page_no": 2, + "bbox": { + "l": 302.96832275390625, + "t": 49.7403564453125, + "r": 308.49029541015625, + "b": 39.960079193115234, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 1 + ] + } + ] + } + ], + "headings": [ + "3.1 PDF backends" + ] + } + }, + { + "text": "Figure 1: Sketch of Docling's default processing pipeline. The inner part of the model pipeline is easily customizable and extensible.", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/30", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "caption", + "prov": [ + { + "page_no": 3, + "bbox": { + "l": 107.11122131347656, + "t": 570.7063598632812, + "r": 504.00335693359375, + "b": 550.3002319335938, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 134 + ] + } + ] + } + ], + "headings": [ + "3.1 PDF backends" + ] + } + }, + { + "text": "licensing (e.g. pymupdf [7]), poor speed or unrecoverable quality issues, such as merged text cells across far-apart text tokens or table columns (pypdfium, PyPDF) [15, 14].", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/31", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 3, + "bbox": { + "l": 107.37481689453125, + "t": 525.6080932617188, + "r": 504.0033264160156, + "b": 504.8570861816406, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 173 + ] + } + ] + } + ], + "headings": [ + "3.1 PDF backends" + ] + } + }, + { + "text": "We therefore decided to provide multiple backend choices, and additionally open-source a custombuilt PDF parser, which is based on the low-level qpdf [4] library. It is made available in a separate package named docling-parse and powers the default PDF backend in Docling. As an alternative, we provide a PDF backend relying on pypdfium , which may be a safe backup choice in certain cases, e.g. if issues are seen with particular font encodings.", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/32", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 3, + "bbox": { + "l": 107.10971069335938, + "t": 498.21685791015625, + "r": 504.0033874511719, + "b": 443.9909973144531, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 446 + ] + } + ] + } + ], + "headings": [ + "3.1 PDF backends" + ] + } + }, + { + "text": "As part of Docling, we initially release two highly capable AI models to the open-source community, which have been developed and published recently by our team. The first model is a layout analysis model, an accurate object-detector for page elements [13]. The second model is TableFormer [12, 9], a state-of-the-art table structure recognition model. We provide the pre-trained weights (hosted on huggingface) and a separate package for the inference code as docling-ibm-models . Both models are also powering the open-access deepsearch-experience, our cloud-native service for knowledge exploration tasks.", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/34", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 3, + "bbox": { + "l": 107.07593536376953, + "t": 406.1695251464844, + "r": 504.1148681640625, + "b": 330.2677307128906, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 608 + ] + } + ] + } + ], + "headings": [ + "3.2 AI models" + ] + } + }, + { + "text": "Our layout analysis model is an object-detector which predicts the bounding-boxes and classes of various elements on the image of a given page. Its architecture is derived from RT-DETR [16] and re-trained on DocLayNet [13], our popular human-annotated dataset for document-layout analysis, among other proprietary datasets. For inference, our implementation relies on the onnxruntime [5].", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/36", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 3, + "bbox": { + "l": 107.1727294921875, + "t": 294.7471923828125, + "r": 504.1613464355469, + "b": 251.51837158203125, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 388 + ] + } + ] + } + ], + "headings": [ + "Layout Analysis Model" + ] + } + }, + { + "text": "The Docling pipeline feeds page images at 72 dpi resolution, which can be processed on a single CPU with sub-second latency. All predicted bounding-box proposals for document elements are post-processed to remove overlapping proposals based on confidence and size, and then intersected with the text tokens in the PDF to group them into meaningful and complete units such as paragraphs, section titles, list items, captions, figures or tables.", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/37", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 3, + "bbox": { + "l": 107.23725891113281, + "t": 245.4161376953125, + "r": 504.00347900390625, + "b": 191.62884521484375, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 443 + ] + } + ] + } + ], + "headings": [ + "Layout Analysis Model" + ] + } + }, + { + "text": "The TableFormer model [12], first published in 2022 and since refined with a custom structure token language [9], is a vision-transformer model for table structure recovery. It can predict the logical row and column structure of a given table based on an input image, and determine which table cells belong to column headers, row headers or the table body. Compared to earlier approaches, TableFormer handles many characteristics of tables, such as partial or no borderlines, empty cells, rows or columns, cell spans and hierarchy both on column-heading or row-heading level, tables with inconsistent indentation or alignment and other complexities. For inference, our implementation relies on PyTorch [2].", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/39", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 3, + "bbox": { + "l": 107.22769927978516, + "t": 156.10821533203125, + "r": 504.01800537109375, + "b": 69.84173583984375, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 706 + ] + } + ] + } + ], + "headings": [ + "Table Structure Recognition" + ] + } + }, + { + "text": "3", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/40", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "page_footer", + "prov": [ + { + "page_no": 3, + "bbox": { + "l": 302.7810974121094, + "t": 49.40008544921875, + "r": 308.4903259277344, + "b": 39.96010971069336, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 1 + ] + } + ] + } + ], + "headings": [ + "Table Structure Recognition" + ] + } + }, + { + "text": "The Docling pipeline feeds all table objects detected in the layout analysis to the TableFormer model, by providing an image-crop of the table and the included text cells. TableFormer structure predictions are matched back to the PDF cells in post-processing to avoid expensive re-transcription text in the table image. Typical tables require between 2 and 6 seconds to be processed on a standard CPU, strongly depending on the amount of included table cells.", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/41", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 4, + "bbox": { + "l": 107.23402404785156, + "t": 717.677001953125, + "r": 504.0035095214844, + "b": 664.2490844726562, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 459 + ] + } + ] + } + ], + "headings": [ + "Table Structure Recognition" + ] + } + }, + { + "text": "Docling provides optional support for OCR, for example to cover scanned PDFs or content in bitmaps images embedded on a page. In our initial release, we rely on EasyOCR [1], a popular thirdparty OCR library with support for many languages. Docling, by default, feeds a high-resolution page image (216 dpi) to the OCR engine, to allow capturing small print detail in decent quality. While EasyOCR delivers reasonable transcription quality, we observe that it runs fairly slow on CPU (upwards of 30 seconds per page).", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/43", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 4, + "bbox": { + "l": 107.0999526977539, + "t": 632.9981689453125, + "r": 504.00347900390625, + "b": 568.0103759765625, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 515 + ] + } + ] + } + ], + "headings": [ + "OCR" + ] + } + }, + { + "text": "We are actively seeking collaboration from the open-source community to extend Docling with additional OCR backends and speed improvements.", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/44", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 4, + "bbox": { + "l": 107.48332214355469, + "t": 561.5487670898438, + "r": 504.0033874511719, + "b": 540.876953125, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 139 + ] + } + ] + } + ], + "headings": [ + "OCR" + ] + } + }, + { + "text": "In the final pipeline stage, Docling assembles all prediction results produced on each page into a well-defined datatype that encapsulates a converted document, as defined in the auxiliary package docling-core . The generated document object is passed through a post-processing model which leverages several algorithms to augment features, such as detection of the document language, correcting the reading order, matching figures with captions and labelling metadata such as title, authors and references. The final output can then be serialized to JSON or transformed into a Markdown representation at the users request.", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/46", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 4, + "bbox": { + "l": 107.259033203125, + "t": 506.85528564453125, + "r": 504.2517395019531, + "b": 431.21771240234375, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 622 + ] + } + ] + } + ], + "headings": [ + "3.3 Assembly" + ] + } + }, + { + "text": "Docling provides a straight-forward interface to extend its capabilities, namely the model pipeline. A model pipeline constitutes the central part in the processing, following initial document parsing and preceding output assembly, and can be fully customized by sub-classing from an abstract baseclass ( BaseModelPipeline ) or cloning the default model pipeline. This effectively allows to fully customize the chain of models, add or replace models, and introduce additional pipeline configuration parameters. To use a custom model pipeline, the custom pipeline class to instantiate can be provided as an argument to the main document conversion methods. We invite everyone in the community to propose additional or alternative models and improvements.", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/48", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 4, + "bbox": { + "l": 107.01625061035156, + "t": 397.58544921875, + "r": 504.00347900390625, + "b": 311.05523681640625, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 753 + ] + } + ] + } + ], + "headings": [ + "3.4 Extensibility" + ] + } + }, + { + "text": "Implementations of model classes must satisfy the python Callable interface. The __call__ method must accept an iterator over page objects, and produce another iterator over the page objects which were augmented with the additional features predicted by the model, by extending the provided PagePredictions data model accordingly.", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/49", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 4, + "bbox": { + "l": 106.94336700439453, + "t": 304.5326232910156, + "r": 504.0707092285156, + "b": 262.160400390625, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 330 + ] + } + ] + } + ], + "headings": [ + "3.4 Extensibility" + ] + } + }, + { + "text": "In this section, we establish some reference numbers for the processing speed of Docling and the resource budget it requires. All tests in this section are run with default options on our standard test set distributed with Docling, which consists of three papers from arXiv and two IBM Redbooks, with a total of 225 pages. Measurements were taken using both available PDF backends on two different hardware systems: one MacBook Pro M3 Max, and one bare-metal server running Ubuntu 20.04 LTS on an Intel Xeon E5-2690 CPU. For reproducibility, we fixed the thread budget (through setting OMP NUM THREADS environment variable ) once to 4 (Docling default) and once to 16 (equal to full core count on the test hardware). All results are shown in Table 1.", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/51", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 4, + "bbox": { + "l": 107.0430679321289, + "t": 221.5301513671875, + "r": 504.22869873046875, + "b": 135.16595458984375, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 750 + ] + } + ] + } + ], + "headings": [ + "4 Performance" + ] + } + }, + { + "text": "If you need to run Docling in very low-resource environments, please consider configuring the pypdfium backend. While it is faster and more memory efficient than the default docling-parse backend, it will come at the expense of worse quality results, especially in table structure recovery.", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/52", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 4, + "bbox": { + "l": 107.19568634033203, + "t": 128.8489990234375, + "r": 504.0033874511719, + "b": 96.76458740234375, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 290 + ] + } + ] + } + ], + "headings": [ + "4 Performance" + ] + } + }, + { + "text": "Establishing GPU acceleration support for the AI models is currently work-in-progress and largely untested, but may work implicitly when CUDA is available and discovered by the onnxruntime and", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/53", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 4, + "bbox": { + "l": 107.47733306884766, + "t": 90.18896484375, + "r": 504.123046875, + "b": 69.5284423828125, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 192 + ] + } + ] + } + ], + "headings": [ + "4 Performance" + ] + } + }, + { + "text": "4", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/54", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "page_footer", + "prov": [ + { + "page_no": 4, + "bbox": { + "l": 302.41058349609375, + "t": 49.65472412109375, + "r": 308.49029541015625, + "b": 39.960079193115234, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 1 + ] + } + ] + } + ], + "headings": [ + "4 Performance" + ] + } + }, + { + "text": "torch runtimes backing the Docling pipeline. We will deliver updates on this topic at in a future version of this report.", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/55", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 5, + "bbox": { + "l": 107.42681121826172, + "t": 717.5958862304688, + "r": 504.0035400390625, + "b": 696.97607421875, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 121 + ] + } + ] + } + ], + "headings": [ + "4 Performance" + ] + } + }, + { + "text": "Table 1: Runtime characteristics of Docling with the standard model pipeline and settings, on our test dataset of 225 pages, on two different systems. OCR is disabled. We show the time-to-solution (TTS), computed throughput in pages per second, and the peak memory used (resident set size) for both the Docling-native PDF backend and for the pypdfium backend, using 4 and 16 threads.", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/56", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "caption", + "prov": [ + { + "page_no": 5, + "bbox": { + "l": 107.0246810913086, + "t": 686.1126708984375, + "r": 504.30712890625, + "b": 643.7755126953125, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 383 + ] + } + ] + } + ], + "headings": [ + "4 Performance" + ] + } + }, + { + "text": "Thanks to the high-quality, richly structured document conversion achieved by Docling, its output qualifies for numerous downstream applications. For example, Docling can provide a base for detailed enterprise document search, passage retrieval or classification use-cases, or support knowledge extraction pipelines, allowing specific treatment of different structures in the document, such as tables, figures, section structure or references. For popular generative AI application patterns, such as retrieval-augmented generation (RAG), we provide quackling , an open-source package which capitalizes on Docling's feature-rich document output to enable document-native optimized vector embedding and chunking. It plugs in seamlessly with LLM frameworks such as LlamaIndex [8]. Since Docling is fast, stable and cheap to run, it also makes for an excellent choice to build document-derived datasets. With its powerful table structure recognition, it provides significant benefit to automated knowledge-base construction [11, 10]. Docling is also integrated within the open IBM data prep kit [6], which implements scalable data transforms to build large-scale multi-modal training datasets.", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/58", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 5, + "bbox": { + "l": 107.10533142089844, + "t": 504.97296142578125, + "r": 504.0229187011719, + "b": 364.4931335449219, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 1189 + ] + } + ] + } + ], + "headings": [ + "5 Applications" + ] + } + }, + { + "text": "Docling is designed to allow easy extension of the model library and pipelines. In the future, we plan to extend Docling with several more models, such as a figure-classifier model, an equationrecognition model, a code-recognition model and more. This will help improve the quality of conversion for specific types of content, as well as augment extracted document metadata with additional information. Further investment into testing and optimizing GPU acceleration as well as improving the Docling-native PDF backend are on our roadmap, too.", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/60", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 5, + "bbox": { + "l": 106.92281341552734, + "t": 323.5386657714844, + "r": 504.00347900390625, + "b": 258.76641845703125, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 543 + ] + } + ] + } + ], + "headings": [ + "6 Future work and contributions" + ] + } + }, + { + "text": "We encourage everyone to propose or implement additional features and models, and will gladly take your inputs and contributions under review . The codebase of Docling is open for use and contribution, under the MIT license agreement and in alignment with our contributing guidelines included in the Docling repository. If you use Docling in your projects, please consider citing this technical report.", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/61", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 5, + "bbox": { + "l": 107.04397583007812, + "t": 252.4183349609375, + "r": 504.0430908203125, + "b": 198.77685546875, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 402 + ] + } + ] + } + ], + "headings": [ + "6 Future work and contributions" + ] + } + }, + { + "text": "[1] J. AI. Easyocr: Ready-to-use ocr with 80+ supported languages. https://github.com/ JaidedAI/EasyOCR , 2024. Version: 1.7.0.\n[2] J. Ansel, E. Yang, H. He, N. Gimelshein, A. Jain, M. Voznesensky, B. Bao, P. Bell, D. Berard, E. Burovski, G. Chauhan, A. Chourdia, W. Constable, A. Desmaison, Z. DeVito, E. Ellison, W. Feng, J. Gong, M. Gschwind, B. Hirsh, S. Huang, K. Kalambarkar, L. Kirsch, M. Lazos, M. Lezcano, Y. Liang, J. Liang, Y. Lu, C. Luk, B. Maher, Y. Pan, C. Puhrsch, M. Reso, M. Saroufim, M. Y. Siraichi, H. Suk, M. Suo, P. Tillet, E. Wang, X. Wang, W. Wen, S. Zhang, X. Zhao, K. Zhou, R. Zou, A. Mathews, G. Chanan, P. Wu, and S. Chintala. Pytorch 2: Faster", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/63", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "list_item", + "prov": [ + { + "page_no": 5, + "bbox": { + "l": 112.33451843261719, + "t": 163.731201171875, + "r": 504.0009460449219, + "b": 142.08197021484375, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 127 + ] + } + ] + }, + { + "self_ref": "#/texts/64", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "list_item", + "prov": [ + { + "page_no": 5, + "bbox": { + "l": 112.45421600341797, + "t": 134.16204833984375, + "r": 504.0035095214844, + "b": 69.84818267822266, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 543 + ] + } + ] + } + ], + "headings": [ + "References" + ] + } + }, + { + "text": "5", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/65", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "page_footer", + "prov": [ + { + "page_no": 5, + "bbox": { + "l": 302.7286376953125, + "t": 49.4200439453125, + "r": 308.49029541015625, + "b": 39.96018600463867, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 1 + ] + } + ] + } + ], + "headings": [ + "References" + ] + } + }, + { + "text": "machine learning through dynamic python bytecode transformation and graph compilation. In Proceedings of the 29th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 2 (ASPLOS '24) . ACM, 4 2024. doi: 10.1145/3620665.3640366. URL https://pytorch.org/assets/pytorch2-2.pdf .", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/66", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 6, + "bbox": { + "l": 129.0050048828125, + "t": 717.4641723632812, + "r": 504.0033264160156, + "b": 674.812744140625, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 331 + ] + } + ] + } + ], + "headings": [ + "References" + ] + } + }, + { + "text": "[3] C. Auer, M. Dolfi, A. Carvalho, C. B. Ramis, and P. W. Staar. Delivering document conversion as a cloud service with high throughput and responsiveness. In 2022 IEEE 15th International Conference on Cloud Computing (CLOUD) , pages 363-373. IEEE, 2022.\n[4] J. Berkenbilt. Qpdf: A content-preserving pdf document transformer, 2024. URL https: //github.com/qpdf/qpdf .\n[5] O. R. developers. Onnx runtime. https://onnxruntime.ai/ , 2024. Version: 1.18.1.\n[6] IBM. Data Prep Kit: a community project to democratize and accelerate unstructured data preparation for LLM app developers, 2024. URL https://github.com/IBM/ data-prep-kit .\n[7] A. S. Inc. PyMuPDF, 2024. URL https://github.com/pymupdf/PyMuPDF .\n[8] J. Liu. LlamaIndex, 11 2022. URL https://github.com/jerryjliu/llama_index .\n[9] M. Lysak, A. Nassar, N. Livathinos, C. Auer, and P. Staar. Optimized Table Tokenization for Table Structure Recognition. In Document Analysis and Recognition - ICDAR 2023: 17th International Conference, San Jos'e, CA, USA, August 21-26, 2023, Proceedings, Part II , pages 37-50, Berlin, Heidelberg, Aug. 2023. Springer-Verlag. ISBN 978-3-031-41678-1. doi: 10. 1007/978-3-031-41679-8 3. URL https://doi.org/10.1007/978-3-031-41679-8_3 .\n[10] L. Mishra, S. Dhibi, Y. Kim, C. Berrospi Ramis, S. Gupta, M. Dolfi, and P. Staar. Statements: Universal information extraction from tables with large language models for ESG KPIs. In D. Stammbach, J. Ni, T. Schimanski, K. Dutia, A. Singh, J. Bingler, C. Christiaen, N. Kushwaha, V. Muccione, S. A. Vaghefi, and M. Leippold, editors, Proceedings of the 1st Workshop on Natural Language Processing Meets Climate Change (ClimateNLP 2024) , pages 193-214, Bangkok, Thailand, Aug. 2024. Association for Computational Linguistics. URL https://aclanthology.org/2024.climatenlp-1.15 .\n[11] L. Morin, V. Weber, G. I. Meijer, F. Yu, and P. W. J. Staar. Patcid: an open-access dataset of chemical structures in patent documents. Nature Communications , 15(1):6532, August 2024. ISSN 2041-1723. doi: 10.1038/s41467-024-50779-y. URL https://doi.org/10.1038/ s41467-024-50779-y .\n[12] A. Nassar, N. Livathinos, M. Lysak, and P. Staar. Tableformer: Table structure understanding with transformers. In Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition , pages 4614-4623, 2022.\n[13] B. Pfitzmann, C. Auer, M. Dolfi, A. S. Nassar, and P. Staar. Doclaynet: a large humanannotated dataset for document-layout segmentation. pages 3743-3751, 2022.\n[14] pypdf Maintainers. pypdf: A Pure-Python PDF Library, 2024. URL https://github.com/ py-pdf/pypdf .\n[15] P. Team. PyPDFium2: Python bindings for PDFium, 2024. URL https://github.com/ pypdfium2-team/pypdfium2 .\n[16] Y. Zhao, W. Lv, S. Xu, J. Wei, G. Wang, Q. Dang, Y. Liu, and J. Chen. Detrs beat yolos on real-time object detection, 2023.", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/67", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "list_item", + "prov": [ + { + "page_no": 6, + "bbox": { + "l": 112.47968292236328, + "t": 665.970458984375, + "r": 504.3585510253906, + "b": 634.421630859375, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 255 + ] + } + ] + }, + { + "self_ref": "#/texts/68", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "list_item", + "prov": [ + { + "page_no": 6, + "bbox": { + "l": 112.59274291992188, + "t": 625.3558349609375, + "r": 504.00018310546875, + "b": 603.854736328125, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 113 + ] + } + ] + }, + { + "self_ref": "#/texts/69", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "list_item", + "prov": [ + { + "page_no": 6, + "bbox": { + "l": 112.65106964111328, + "t": 595.5201416015625, + "r": 478.88665771484375, + "b": 585.318359375, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 84 + ] + } + ] + }, + { + "self_ref": "#/texts/70", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "list_item", + "prov": [ + { + "page_no": 6, + "bbox": { + "l": 112.5077896118164, + "t": 576.7722778320312, + "r": 504.0283508300781, + "b": 544.3335571289062, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 177 + ] + } + ] + }, + { + "self_ref": "#/texts/71", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "list_item", + "prov": [ + { + "page_no": 6, + "bbox": { + "l": 112.71062469482422, + "t": 536.3712768554688, + "r": 447.4246826171875, + "b": 526.034423828125, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 70 + ] + } + ] + }, + { + "self_ref": "#/texts/72", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "list_item", + "prov": [ + { + "page_no": 6, + "bbox": { + "l": 112.72732543945312, + "t": 516.6817016601562, + "r": 483.91107177734375, + "b": 506.7769470214844, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 79 + ] + } + ] + }, + { + "self_ref": "#/texts/73", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "list_item", + "prov": [ + { + "page_no": 6, + "bbox": { + "l": 112.50459289550781, + "t": 498.0171203613281, + "r": 504.004638671875, + "b": 444.5917053222656, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 439 + ] + } + ] + }, + { + "self_ref": "#/texts/74", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "list_item", + "prov": [ + { + "page_no": 6, + "bbox": { + "l": 107.49420928955078, + "t": 435.72955322265625, + "r": 504.1082458496094, + "b": 359.86444091796875, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 581 + ] + } + ] + }, + { + "self_ref": "#/texts/75", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "list_item", + "prov": [ + { + "page_no": 6, + "bbox": { + "l": 107.34581756591797, + "t": 351.3507995605469, + "r": 504.6417541503906, + "b": 308.78851318359375, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 288 + ] + } + ] + }, + { + "self_ref": "#/texts/76", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "list_item", + "prov": [ + { + "page_no": 6, + "bbox": { + "l": 107.38827514648438, + "t": 299.4344177246094, + "r": 504.3544616699219, + "b": 268.1841125488281, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 226 + ] + } + ] + }, + { + "self_ref": "#/texts/77", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "list_item", + "prov": [ + { + "page_no": 6, + "bbox": { + "l": 107.36676788330078, + "t": 258.790283203125, + "r": 504.00341796875, + "b": 238.3961181640625, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 164 + ] + } + ] + }, + { + "self_ref": "#/texts/78", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "list_item", + "prov": [ + { + "page_no": 6, + "bbox": { + "l": 107.28363800048828, + "t": 229.4072265625, + "r": 504.00091552734375, + "b": 207.166748046875, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 102 + ] + } + ] + }, + { + "self_ref": "#/texts/79", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "list_item", + "prov": [ + { + "page_no": 6, + "bbox": { + "l": 107.2214584350586, + "t": 199.6893310546875, + "r": 504.0008850097656, + "b": 177.491455078125, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 109 + ] + } + ] + }, + { + "self_ref": "#/texts/80", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "list_item", + "prov": [ + { + "page_no": 6, + "bbox": { + "l": 107.28424072265625, + "t": 169.70806884765625, + "r": 504.0033264160156, + "b": 148.91436767578125, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 128 + ] + } + ] + } + ], + "headings": [ + "References" + ] + } + }, + { + "text": "6", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/81", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "page_footer", + "prov": [ + { + "page_no": 6, + "bbox": { + "l": 302.7389221191406, + "t": 49.36236572265625, + "r": 308.5960998535156, + "b": 39.96012496948242, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 1 + ] + } + ] + } + ], + "headings": [ + "References" + ] + } + }, + { + "text": "In this section, we illustrate a few examples of Docling' s output in Markdown and JSON.", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/83", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 7, + "bbox": { + "l": 107.6931381225586, + "t": 694.013671875, + "r": 463.7545471191406, + "b": 684.3182373046875, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 88 + ] + } + ] + } + ], + "headings": [ + "Appendix" + ] + } + }, + { + "text": "Birgit Pfitzmann IBM Research Rueschlikon, Switzerland bpf@zurich.ibm.com", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/85", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 7, + "bbox": { + "l": 138.0285186767578, + "t": 650.9168701171875, + "r": 176.45944213867188, + "b": 631.6739501953125, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 73 + ] + } + ] + } + ], + "headings": [ + "DocLayNet: A Large Human-Annotated Dataset for Document-Layout Analysis" + ] + } + }, + { + "text": "Christoph Auer IBM Research Rueschlikon, Switzerland cau@zurich.ibm.com", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/86", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 7, + "bbox": { + "l": 199.22952270507812, + "t": 650.9168701171875, + "r": 237.34890747070312, + "b": 631.6729125976562, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 71 + ] + } + ] + } + ], + "headings": [ + "DocLayNet: A Large Human-Annotated Dataset for Document-Layout Analysis" + ] + } + }, + { + "text": "Michele Dolfi IBM Research Rueschlikon, Switzerland dol@zurich.ibm.com", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/87", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 7, + "bbox": { + "l": 260.11895751953125, + "t": 650.9168701171875, + "r": 298.3296203613281, + "b": 631.549072265625, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 70 + ] + } + ] + } + ], + "headings": [ + "DocLayNet: A Large Human-Annotated Dataset for Document-Layout Analysis" + ] + } + }, + { + "text": "Ahmed S. Nassar IBM Research Rueschlikon, Switzerland ahn@zurich.ibm.com", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/88", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 7, + "bbox": { + "l": 168.40359497070312, + "t": 629.259521484375, + "r": 206.98048400878906, + "b": 609.97509765625, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 72 + ] + } + ] + } + ], + "headings": [ + "DocLayNet: A Large Human-Annotated Dataset for Document-Layout Analysis" + ] + } + }, + { + "text": "Peter Staar IBM Research Rueschlikon, Switzerland taa@zurich.ibm.com", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/89", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 7, + "bbox": { + "l": 229.48968505859375, + "t": 629.259521484375, + "r": 267.6090393066406, + "b": 610.0166015625, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 68 + ] + } + ] + } + ], + "headings": [ + "DocLayNet: A Large Human-Annotated Dataset for Document-Layout Analysis" + ] + } + }, + { + "text": "Accurate document layout analysis is a key requirement for highquality PDF document conversion. With the recent availability of public, large ground-truth datasets such as PubLayNet and DocBank, deep-learning models have proven to be very effective at layout detection and segmentation. While these datasets are of adequate size to train such models, they severely lack in layout variability since they are sourced from scientific article repositories such as PubMed and arXiv only. Consequently, the accuracy of the layout segmentation drops significantly when these models are applied on more challenging and diverse layouts. In this paper, we present $_{DocLayNet}$, a new, publicly available, document-layout annotation dataset in COCO format. It contains 80863 manually annotated pages from diverse data sources to represent a wide variability in layouts. For each PDF page, the layout annotations provide labelled bounding-boxes with a choice of 11 distinct classes. DocLayNet also provides a subset of double- and triple-annotated pages to determine the inter-annotator agreement. In multiple experiments, we provide baseline accuracy scores (in mAP) for a set of popular object detection models. We also demonstrate that these models fall approximately 10% behind the inter-annotator agreement. Furthermore, we provide evidence that DocLayNet is of sufficient size. Lastly, we compare models trained on PubLayNet, DocBank and DocLayNet, showing that layout predictions of the DocLayNettrained models are more robust and thus the preferred choice for general-purpose document-layout analysis.", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/91", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 7, + "bbox": { + "l": 123.66893768310547, + "t": 602.5093994140625, + "r": 214.2318878173828, + "b": 500.3504333496094, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 1599 + ] + } + ] + } + ], + "headings": [ + "ABSTRACT" + ] + } + }, + { + "text": "\u00b7 Information systems \u2192 Document structure ; \u00b7 Applied computing \u2192 Document analysis ; \u00b7 Computing methodologies \u2192 Machine learning ; Computer vision ; $_{Object detection}$;", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/93", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 7, + "bbox": { + "l": 124.05064392089844, + "t": 490.005126953125, + "r": 215.08236694335938, + "b": 476.94268798828125, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 174 + ] + } + ] + } + ], + "headings": [ + "CCS CONCEPTS" + ] + } + }, + { + "text": "Permission to make digital or hard copies of part or all of this work for personal or classroom use is granted without fee provided that copies are not made or distributed for profi t or commercial advantage and that copies bear this notice and the full citation on thefirst page. Copyrights for third-party components of this work must be honored. For all other uses, contact the owner/author(s). KDD '22, August 14-18, 2022, Washington, DC, USA \u00a9 2022 Copyright held by the owner/author(s). ACM ISBN 978-1-4503-9385-0/22/08. https://doi.org/10.1145/3534678.3539043", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/94", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 7, + "bbox": { + "l": 123.8716049194336, + "t": 464.7064514160156, + "r": 214.06785583496094, + "b": 436.57623291015625, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 566 + ] + } + ] + } + ], + "headings": [ + "CCS CONCEPTS" + ] + } + }, + { + "text": "Birgit Pfitzmann IBM Research Rueschlikon, Switzerland bpf@zurich.ibm.com", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/96", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 7, + "bbox": { + "l": 329.2007141113281, + "t": 657.4287109375, + "r": 433.130126953125, + "b": 653.031005859375, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 73 + ] + } + ] + } + ], + "headings": [ + "DocLayNet: A Large Human-Annotated Dataset for Document-Layout Analysis" + ] + } + }, + { + "text": "Christoph Auer IBM Research Rueschlikon, Switzerland cau@zurich.ibm.com", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/97", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 7, + "bbox": { + "l": 329.6015930175781, + "t": 648.9207153320312, + "r": 432.7991943359375, + "b": 645.91748046875, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 71 + ] + } + ] + } + ], + "headings": [ + "DocLayNet: A Large Human-Annotated Dataset for Document-Layout Analysis" + ] + } + }, + { + "text": "Michele Dolfi IBM Research Rueschlikon, Switzerland dol@zurich.ibm.com", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/98", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 7, + "bbox": { + "l": 329.18927001953125, + "t": 641.90869140625, + "r": 429.5950012207031, + "b": 637.8482666015625, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 70 + ] + } + ] + } + ], + "headings": [ + "DocLayNet: A Large Human-Annotated Dataset for Document-Layout Analysis" + ] + } + }, + { + "text": "Ahmed S. Nassar IBM Research Rueschlikon, Switzerland ahn@zurich.ibm.com", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/99", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 7, + "bbox": { + "l": 329.2640075683594, + "t": 633.8328857421875, + "r": 436.4726867675781, + "b": 629.6668090820312, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 72 + ] + } + ] + } + ], + "headings": [ + "DocLayNet: A Large Human-Annotated Dataset for Document-Layout Analysis" + ] + } + }, + { + "text": "Peter Staar IBM Research Rueschlikon, Switzerland taa@zurich.ibm.com", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/100", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 7, + "bbox": { + "l": 329.2624206542969, + "t": 625.7568359375, + "r": 427.5014953613281, + "b": 621.548583984375, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 68 + ] + } + ] + } + ], + "headings": [ + "DocLayNet: A Large Human-Annotated Dataset for Document-Layout Analysis" + ] + } + }, + { + "text": "Accurate document layout analysis is a key requirement for highquality PDF document conversion. With the recent availability of public, large groundtruth datasets such as PubLayNet and DocBank, deep-learning models have proven to be very effective at layout detection and segmentation. While these datasets are of adequate size to train such models, they severely lack in layout variability since they are sourced from scientific article repositories such as PubMed and arXiv only. Consequently, the accuracy of the layout segmentation drops significantly when these models are applied on more challenging and diverse layouts. In this paper, we present DocLayNet , a new, publicly available, document-layout annotation dataset in COCO format. It contains 80863 manually annotated pages from diverse data sources to represent a wide variability in layouts. For each PDF page, the layout annotations provide labelled bounding-boxes with a choice of 11 distinct classes. DocLayNet also provides a subset of double- and triple-annotated pages to determine the inter-annotator agreement. In multiple experiments, we provide baseline accuracy scores (in mAP) for a set of popular object detection models. We also demonstrate that these models fall approximately 10% behind the inter-annotator agreement. Furthermore, we provide evidence that DocLayNet is of sufficient size. Lastly, we compare models trained on PubLayNet, DocBank and DocLayNet, showing that layout predictions of the DocLayNettrained models are more robust and thus the preferred choice for general-purpose document-layout analysis.", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/102", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 7, + "bbox": { + "l": 328.861083984375, + "t": 604.5524291992188, + "r": 528.3615112304688, + "b": 549.0685424804688, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 1594 + ] + } + ] + } + ], + "headings": [ + "ABSTRACT" + ] + } + }, + { + "text": "$_{\u00b7 Information systems }$\u2192$_{ Document structure ; \u00b7 Applied computing }$ \u2192$_{ Document analysis ; \u00b7 Computing methodologies }$\u2192$_{ Machine learning ;}$ Computer vision ; Object detection ;", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/104", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 7, + "bbox": { + "l": 329.4852600097656, + "t": 532.8919067382812, + "r": 516.2509155273438, + "b": 523.6624755859375, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 191 + ] + } + ] + } + ], + "headings": [ + "CCS CONCEPTS" + ] + } + }, + { + "text": "Permission to make digital or hard copies of part or all of this work for personal or classroom use is granted without fee provided that copies are not made or distributed for profit or commercial advantage and that copies bear this notice and the full citation on the first page. Copyrights for third-party components of this work must be honored. For all other uses, contact the owner/author(s).", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/105", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 7, + "bbox": { + "l": 329.1643371582031, + "t": 519.994873046875, + "r": 527.3062133789062, + "b": 506.2882080078125, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 397 + ] + } + ] + } + ], + "headings": [ + "CCS CONCEPTS" + ] + } + }, + { + "text": "KDD '22, August 14-18, 2022, Washington, DC, USA \u00a9 2022 Copyright held by the owner/author(s). ACM ISBN 978-1-4503-9385-0/22/08. https://doi.org/10.1145/3534678.3539043", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/106", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 7, + "bbox": { + "l": 329.1140441894531, + "t": 502.5775146484375, + "r": 513.2442016601562, + "b": 493.3287353515625, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 168 + ] + } + ] + } + ], + "headings": [ + "CCS CONCEPTS" + ] + } + }, + { + "text": "Figure 1: Four examples of complex page layouts across different document categories", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/107", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 7, + "bbox": { + "l": 329.0572509765625, + "t": 490.3890686035156, + "r": 445.8473205566406, + "b": 486.1141662597656, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 84 + ] + } + ] + } + ], + "headings": [ + "CCS CONCEPTS" + ] + } + }, + { + "text": "PDF document conversion, layout segmentation, object-detection, data set, Machine Learning", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/109", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 7, + "bbox": { + "l": 329.092529296875, + "t": 469.5487365722656, + "r": 454.5943603515625, + "b": 465.4438781738281, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 90 + ] + } + ] + } + ], + "headings": [ + "KEYWORDS" + ] + } + }, + { + "text": "Birgit Pfitzmann, Christoph Auer, Michele Dolfi, Ahmed S. Nassar, and Peter Staar. 2022. DocLayNet: A Large Human-Annotated Dataset for DocumentLayout Analysis. In Proceedings of the 28th ACM SIGKDD Conference on Knowledge Discovery and Data Mining (KDD '22), August 14-18, 2022, Washington, DC, USA. ACM, New York, NY, USA, 9 pages. https://doi.org/10.1145/ 3534678.3539043", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/111", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 7, + "bbox": { + "l": 328.9222412109375, + "t": 448.7705383300781, + "r": 528.159423828125, + "b": 435.41400146484375, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 374 + ] + } + ] + } + ], + "headings": [ + "ACM Reference Format:" + ] + } + }, + { + "text": "Figure 1: Four examples of complex page layouts across different document categories", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/112", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "caption", + "prov": [ + { + "page_no": 7, + "bbox": { + "l": 221.84927368164062, + "t": 499.2803955078125, + "r": 312.25115966796875, + "b": 490.75177001953125, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 84 + ] + } + ] + } + ], + "headings": [ + "ACM Reference Format:" + ] + } + }, + { + "text": "PDF document conversion, layout segmentation, object-detection, data set, Machine Learning", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/114", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 7, + "bbox": { + "l": 222.00753784179688, + "t": 474.62298583984375, + "r": 312.0212097167969, + "b": 465.4729919433594, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 90 + ] + } + ] + } + ], + "headings": [ + "KEYWORDS" + ] + } + }, + { + "text": "Birgit Pfitzmann, Christoph Auer, Michele Dolfi , Ahmed S. Nassar, and Peter Staar. 2022. DocLayNet: A Large Human-Annotated Dataset for DocumentLayout Analysis. In Proceedings of the 28th ACM SIGKDD Conference on Knowledge Discovery and Data Mining (KDD '22), August 14-18, 2022, Wash-$_{ington, DC, USA.}$ACM, New York, NY, USA, 9 pages. https://doi.org/10.1145/ 3534678.3539043", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/116", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 7, + "bbox": { + "l": 221.68344116210938, + "t": 458.718994140625, + "r": 312.1560974121094, + "b": 436.15557861328125, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 380 + ] + } + ] + } + ], + "headings": [ + "ACM Reference Format:" + ] + } + }, + { + "text": "1 INTRODUCTION", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/117", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 7, + "bbox": { + "l": 329.6015930175781, + "t": 428.9794921875, + "r": 373.37646484375, + "b": 423.8311462402344, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 14 + ] + } + ] + } + ], + "headings": [ + "ACM Reference Format:" + ] + } + }, + { + "text": "Despite the substantial improvements achieved with machine-learning (ML) approaches and deep neural networks in recent years, document conversion remains a challenging problem, as demonstrated by the numerous public competitions held on this topic [1-4]. The challenge originates from the huge variability in PDF documents regarding layout, language and formats (scanned, programmatic or a combination of both). Engineering a single ML model that can be applied on all types of documents and provides high-quality layout segmentation remains to this day extremely challenging [5]. To highlight the variability in document layouts, we show a few example documents from the DocLayNet dataset in Figure 1. Figure 2: Title page of the DocLayNet paper (arxiv .org/pdf/2206.01062) - left PDF, right rendered Markdown. If recognized, metadata such as authors are appearing first under the title. Text content inside figures is currently dropped, the caption is retained and linked to the figure in the JSON representation (not shown).", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/118", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 7, + "bbox": { + "l": 107.31889343261719, + "t": 420.2637939453125, + "r": 527.5916137695312, + "b": 377.62860107421875, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 1027 + ] + } + ] + } + ], + "headings": [ + "ACM Reference Format:" + ] + } + }, + { + "text": "7", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/119", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "page_footer", + "prov": [ + { + "page_no": 7, + "bbox": { + "l": 302.8258056640625, + "t": 49.2652587890625, + "r": 308.49029541015625, + "b": 39.960079193115234, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 1 + ] + } + ] + } + ], + "headings": [ + "ACM Reference Format:" + ] + } + }, + { + "text": "arXiv:2206.01062v1 [cs.CV] 2 Jun 2022", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/120", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "page_header", + "prov": [ + { + "page_no": 7, + "bbox": { + "l": 110.2352066040039, + "t": 618.2011108398438, + "r": 118.32157135009766, + "b": 492.749267578125, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 37 + ] + } + ] + } + ], + "headings": [ + "ACM Reference Format:" + ] + } + }, + { + "text": "KDD '22, August 14-18, 2022, Washington, DC, USA Birgit Pfitzmann, Christoph Auer, Michele Dolfi, Ahmed S. Nassar, and Peter Staar", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/121", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 8, + "bbox": { + "l": 122.18534851074219, + "t": 563.207763671875, + "r": 338.8071594238281, + "b": 558.6549682617188, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 130 + ] + } + ] + } + ], + "headings": [ + "ACM Reference Format:" + ] + } + }, + { + "text": "Table 2: Prediction performance (mAP@0.5-0.95) of object detection networks on DocLayNet test set. The MRCNN (Mask R-CNN) and FRCNN (Faster R-CNN) models with ResNet-50 or ResNet-101 backbone were trained based on the network architectures from the detectron2 model zoo (Mask R-CNN R50, R101-FPN 3x, Faster R-CNN R101-FPN 3x), with default configurations. The YOLO implementation utilized was YOLOv5x6 [13]. All models were initialised using pre-trained weights from the COCO 2017 dataset.", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/122", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "caption", + "prov": [ + { + "page_no": 8, + "bbox": { + "l": 122.11329650878906, + "t": 552.1026611328125, + "r": 226.37594604492188, + "b": 509.48504638671875, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 489 + ] + } + ] + } + ], + "headings": [ + "ACM Reference Format:" + ] + } + }, + { + "text": "to avoid this at any cost in order to have clear, unbiased baseline numbers for human document-layout annotation. Third, we introduced the feature of snapping boxes around text segments to obtain a pixel-accurate annotation and again reduce time and effort. The CCS annotation tool automatically shrinks every user-drawn box to the minimum bounding-box around the enclosed text-cells for all purely text-based segments, which excludes only Table and $_{Picture}$. For the latter, we instructed annotation staffto minimise inclusion of surrounding whitespace while including all graphical lines. A downside of snapping boxes to enclosed text cells is that some wrongly parsed PDF pages cannot be annotated correctly and need to be skipped. Fourth, we established a way toflag pages as rejected for cases where no valid annotation according to the label guidelines could be achieved. Example cases for this would be PDF pages that render incorrectly or contain layouts that are impossible to capture with non-overlapping rectangles. Such rejected pages are not contained in thefinal dataset. With all these measures in place, experienced annotation staffmanaged to annotate a single page in a typical timeframe of 20s to 60s, depending on its complexity.", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/123", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 8, + "bbox": { + "l": 121.85212707519531, + "t": 431.1610107421875, + "r": 226.33633422851562, + "b": 341.54669189453125, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 1252 + ] + } + ] + } + ], + "headings": [ + "ACM Reference Format:" + ] + } + }, + { + "text": "The primary goal of DocLayNet is to obtain high-quality ML models capable of accurate document-layout analysis on a wide variety of challenging layouts. As discussed in Section 2, object detection models are currently the easiest to use, due to the standardisation of ground-truth data in COCO format [16] and the availability of general frameworks such as detectron2 [17]. Furthermore, baseline numbers in PubLayNet and DocBank were obtained using standard object detection models such as Mask R-CNN and Faster R-CNN. As such, we will relate to these object detection methods in this", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/125", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 8, + "bbox": { + "l": 122.00563049316406, + "t": 327.5806884765625, + "r": 226.2816162109375, + "b": 284.8097229003906, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 584 + ] + } + ] + } + ], + "headings": [ + "5 EXPERIMENTS" + ] + } + }, + { + "text": "Figure 3: Page 6 of the DocLayNet paper. If recognized, metadata such as authors are appearing first under the title. Elements recognized as page headers or footers are suppressed in Markdown to deliver uninterrupted content in reading order. Tables are inserted in reading order. The paragraph in \"5. Experiments\" wrapping over the column end is broken up in two and interrupted by the table.", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/126", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "caption", + "prov": [ + { + "page_no": 8, + "bbox": { + "l": 107.26910400390625, + "t": 267.0020751953125, + "r": 504.2988586425781, + "b": 224.93768310546875, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 393 + ] + } + ] + } + ], + "headings": [ + "5 EXPERIMENTS" + ] + } + }, + { + "text": "Figure 5: Prediction performance (mAP@0.5-0.95) of a Mask R-CNN network with ResNet50 backbone trained on increasing fractions of the DocLayNet dataset. The learning curv eflattens around the 80% mark, indicating that increasing the size of the DocLayNet dataset with similar data will not yield significantly better predictions.", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/127", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 8, + "bbox": { + "l": 235.38954162597656, + "t": 469.9726867675781, + "r": 339.28778076171875, + "b": 441.4075927734375, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 329 + ] + } + ] + } + ], + "headings": [ + "5 EXPERIMENTS" + ] + } + }, + { + "text": "paper and leave the detailed evaluation of more recent methods mentioned in Section 2 for future work.", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/128", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 8, + "bbox": { + "l": 234.98081970214844, + "t": 425.5683898925781, + "r": 338.644775390625, + "b": 415.5873718261719, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 102 + ] + } + ] + } + ], + "headings": [ + "5 EXPERIMENTS" + ] + } + }, + { + "text": "In this section, we will present several aspects related to the performance of object detection models on DocLayNet. Similarly as in PubLayNet, we will evaluate the quality of their predictions using mean average precision (mAP) with 10 overlaps that range from 0.5 to 0.95 in steps of 0.05 (mAP@0.5-0.95). These scores are computed by leveraging the evaluation code provided by the COCO API [16].", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/129", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 8, + "bbox": { + "l": 234.98487854003906, + "t": 416.19970703125, + "r": 338.76287841796875, + "b": 382.79742431640625, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 397 + ] + } + ] + } + ], + "headings": [ + "5 EXPERIMENTS" + ] + } + }, + { + "text": "In Table 2, we present baseline experiments (given in mAP) on Mask R-CNN [12], Faster R-CNN [11], and YOLOv5 [13]. Both training and evaluation were performed on RGB images with dimensions of $^{1025}$\u00d71025 pixels. For training, we only used one annotation in case of redundantly annotated pages. As one can observe, the variation in mAP between the models is rather low, but overall between 6 and 10% lower than the mAP computed from the pairwise human annotations on triple-annotated pages. This gives a good indication that the DocLayNet dataset poses a worthwhile challenge for the research community to close the gap between human recognition and ML approaches. It is interesting to see that Mask R-CNN and Faster R-CNN produce very comparable mAP scores, indicating that pixel-based image segmentation derived from bounding-boxes does not help to obtain better predictions. On the other hand, the more recent Yolov5x model does very well and even out-performs humans on selected labels such as $_{Text}$, Table and $_{Picture}$. This is not entirely surprising, as and Picture are abundant and the most visually distinctive in a document.", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/131", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 8, + "bbox": { + "l": 235.06893920898438, + "t": 370.8502197265625, + "r": 338.89947509765625, + "b": 285.920654296875, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 1144 + ] + } + ] + } + ], + "headings": [ + "Baselines for Object Detection" + ] + } + }, + { + "text": "Prediclion Derormance (up80.5-0.85 ohobeci detecion lalo ks Doclaynal Lest saL Ine VACNN (Mask R-CNNI and FACNN (Faster A-CNM) modcs mith PosNc: 50 PosNo: 101 backtone woro trainod based on Enc nchwwcrk achrocturos tom Ihc Oeronhroase a-CNn aso rioi-Fpn Jx, FasieA-Cnn a1o1-FPN Jx), wilh delaui conlwuralions The YoUg mpomorcabon utilzod w2s YoloSyb(13| modos woro inbalsod usino cro-trunodmonhts hron Coco 2017 datasor", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/132", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 8, + "bbox": { + "l": 366.3333435058594, + "t": 563.0970458984375, + "r": 527.1106567382812, + "b": 547.0772705078125, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 419 + ] + } + ] + } + ], + "headings": [ + "Baselines for Object Detection" + ] + } + }, + { + "text": "iD avod Ihbs arcost cha unbasndbasolino numoc human cocumnnt-Laycut annotalion; Thrd Inirooucod leatura 0i snapoina Doxes around lerl scainunis cblan & pixel-accuiale annolaton and aJan feduce Bifre and elonThe CCS annoinbon aloMalca shruks Ovory Usor-drawnboro mnmum boundino-borarounaIho onclosod coxt-colls Purolytort basud scoitontwhich uxclldcs Ort Tatlo and Picluo latsor Inssucicdannjlabon sha mnim so inclusion Suitcurding mlospeco whloIncvon Oenoncang doans d0 oisnaocmnbors Onchse Ihal So10 wioogly Daisoc Pogcs Cannol be annotalcd coTcCEY and nccd supocd Foudn Oshdned Wuyio(aq Dagcs (ccclod Cases whcion valid anncuabon eccofding abeiqu Oelines coukbe acheneu Eamnole Case, flis wouk PDF peoe3 Ihal rendernnccrrecUy contanlavuta hat Imnosshk cantra milh Vananonnyogannio{ Suchiceciodoaoos not coralnon Ihofnn hr Aroknacoarreehetyn annollca slall nluuocd unnoln sina \" Puou lypical Lmnetamre 0l 20s 10 605 cecendnc conoanty", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/133", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 8, + "bbox": { + "l": 365.95367431640625, + "t": 447.0, + "r": 530.2679443359375, + "b": 405.3583984375, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 934 + ] + } + ] + } + ], + "headings": [ + "Baselines for Object Detection" + ] + } + }, + { + "text": "Ine crimary goal OocVAYNo cblan hion-quality Modols AccuaiodoaMoiuvana4s WMeVanalon chalcnonglayoul: Cecurdg echon Doicdi Delccion modcb rtene Casistlo Usc, Quulo Hhndandiubon ground-vuth data COCO lornat [16] and avaladloy enetal Irarnenoiks uch derectrcnz7] Furnemmcre, baseline nmnoe < I Putun Notand DocBank calanodusnsundad coict dosnchonmodols such Mas< A CNN and Fasior A CNN SuEna blraomhdelecfa nonInr Canacle", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/135", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 8, + "bbox": { + "l": 366.0, + "t": 391.0, + "r": 529.8655395507812, + "b": 370.37261962890625, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 418 + ] + } + ] + } + ], + "headings": [ + "5 EXPERIMENTS" + ] + } + }, + { + "text": "Fauri Prco chon ocrloianC( 005-095) ola Mask A-CNN ncthoik ilh AcsNciSo backbono brainod on incrcasing Iracbons oi DocLaynei calasot Tne loannp auro altons around Ih0 \u20ac03 noicahino Ihal inxreasing /e 520 Q Iho DocL\u00f8y Nel dalasot Amardaen nol Ycid sn: dorOocC Chons LAD", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/136", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 8, + "bbox": { + "l": 365.9671936035156, + "t": 367.0, + "r": 528.6666870117188, + "b": 354.9878845214844, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 268 + ] + } + ] + } + ], + "headings": [ + "5 EXPERIMENTS" + ] + } + }, + { + "text": "pangrandloave detallod evalvallon %moro rcoarimolhods monionan Secilg Jorhlure work", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/137", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 8, + "bbox": { + "l": 365.8995056152344, + "t": 351.3333435058594, + "r": 489.40869140625, + "b": 347.69952392578125, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 83 + ] + } + ] + } + ], + "headings": [ + "5 EXPERIMENTS" + ] + } + }, + { + "text": "Inuhs sechon All Deseni seur8/ asoecis reles00 Perormanoe ouieci celec on DoxclayNet Simamtas In PLoLaynnt oyuato tnn qualmy cuthnlr crodictionsusiramnanavnna prncisicn (TTAP) wch IDovrdaos that rangn trom 0 5ta 005 (nap,o6-00: Ml olue Fnoula Cvurbar uvalaion coou piayIed DY Ihu COCO API/161 ook", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/138", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 8, + "bbox": { + "l": 366.1520690917969, + "t": 344.3362731933594, + "r": 527.7802124023438, + "b": 332.3333435058594, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 296 + ] + } + ] + } + ], + "headings": [ + "5 EXPERIMENTS" + ] + } + }, + { + "text": "ptesenl baselne expenrnenls (Qvenin MAF) on Mas< R-CNN /121 Fasler F-CNN [11] an1 YOLOvS [13] Bou1 brann anavailang woropomormod AGa Imnoos vith dimonsions 1025 chxrols For tralring onN usodomannolatln Incaso ohcuunourfhunnolulco Dac3 Ohenn Vuruhoninptalunhamagny usnaroA en hn 10?7 loworrnannomap conoutec paicaisehuman anncrbons Aoo-amculeopnnos Ins Cves nacaton thatrhe DocLayNot daasci DOfo s mornwro clagnoo [csoarcncomrurt gap bctwoon human focogniticn and VL aporoaces nlelesuio IharNaska-CNNead Fasler GNincroova comnanen Maseoes nnocauna Ulbi AICBasodnanc scomrorubon oormvod Irom bounon)ooros Ooo{ abuin totcrorcochons Ontho chornnno Mcrocconi YolavSrmrodel does verywell und even Dul-Perdorins selectedlubels such Tedle undpcturl enbeh surcrisio Ta oloandPchre poincant amimemostasiaIN ishinsine documen: Ouau hnne", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/140", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 8, + "bbox": { + "l": 365.9697570800781, + "t": 317.6666564941406, + "r": 529.27099609375, + "b": 280.0965881347656, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 825 + ] + } + ] + } + ], + "headings": [ + "Baselines for Object Detection" + ] + } + }, + { + "text": "8", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/141", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "page_footer", + "prov": [ + { + "page_no": 8, + "bbox": { + "l": 303.0059509277344, + "t": 48.90887451171875, + "r": 308.49029541015625, + "b": 39.960079193115234, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 1 + ] + } + ] + } + ], + "headings": [ + "Baselines for Object Detection" + ] + } + }, + { + "text": "KDD '22, August 14-18, 2022, Washington, DC, USA Birgit Pfitzmann, Christoph Auer, Michele Dolfi, Ahmed S. Nassar, and Peter Staar", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/142", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 9, + "bbox": { + "l": 88.67599487304688, + "t": 598.9852294921875, + "r": 346.2541809082031, + "b": 593.6693115234375, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 130 + ] + } + ] + } + ], + "headings": [ + "Baselines for Object Detection" + ] + } + }, + { + "text": "Table 1: DocLayNet dataset overview. Along with the frequency of each class label, we present the relative occurrence (as % of row \"Total\") in the train, test and validation sets. The inter-annotator agreement is computed as the mAP@0.5-0.95 metric between pairwise annotations from the tripleannotated pages, from which we obtain accuracy ranges. Table 1: DocLayNet dataset overview. Along with the frequency of each class label, we present the relative occurr ence (as % of row \"Total\") in the train, test and validation sets. The inter-annotator agreement is computed as the mAP@0.5-0.95 metric between pairwise annotations from the triple-annotated pages, from which we obtain accuracy ranges. B", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/143", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "caption", + "prov": [ + { + "page_no": 9, + "bbox": { + "l": 88.52484130859375, + "t": 586.8209228515625, + "r": 525.9969482421875, + "b": 561.3492431640625, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 699 + ] + } + ] + } + ], + "headings": [ + "Baselines for Object Detection" + ] + } + }, + { + "text": "Figure 3: face. The laid te be drawn the respe", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/144", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "caption", + "prov": [ + { + "page_no": 9, + "bbox": { + "l": 88.67599487304688, + "t": 347.296630859375, + "r": 108.26393127441406, + "b": 318.76702880859375, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 46 + ] + } + ] + } + ], + "headings": [ + "Baselines for Object Detection" + ] + } + }, + { + "text": "we distribute d the annotation workload and performed continuous quality contr ols. Phase one and two required a small team of experts only. For phases three and four, a group of 40 dedicated annotators were assembled and supervised. Phase 1: Data selection and preparation. Our inclusion cri-", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/145", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 9, + "bbox": { + "l": 88.50696563720703, + "t": 306.8683776855469, + "r": 212.13279724121094, + "b": 277.8305358886719, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 293 + ] + } + ] + } + ], + "headings": [ + "Baselines for Object Detection" + ] + } + }, + { + "text": "of pages ed by seerties. For cument figur es or object how", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/146", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 9, + "bbox": { + "l": 327.33526611328125, + "t": 415.4449157714844, + "r": 347.025390625, + "b": 375.5401916503906, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 58 + ] + } + ] + } + ], + "headings": [ + "Baselines for Object Detection" + ] + } + }, + { + "text": "d the colfealayout labels. Pageand $_{Title}$. class cificity ed for of the ambiguous, while coverage ensures that all meaningful items on a page can be annotated. We refrained from class labels that are very specific to a document category, such as Abstract in the Scientific Articles category. We also avoided class labels that are tightly linked to the semantics of the text. Labels such as Author and $_{Affiliation}$, as seen", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/147", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 9, + "bbox": { + "l": 223.4002227783203, + "t": 370.67547607421875, + "r": 347.0276794433594, + "b": 280.1531982421875, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 430 + ] + } + ] + } + ], + "headings": [ + "Baselines for Object Detection" + ] + } + }, + { + "text": "teria for documents were described in Section 3. A large effort went into ensuring that all documents are free to use. The data sources in DocBank, are often only distinguishable by discriminating on $^{3}$https://arxiv.org/ Figure 4: Table 1 from the DocLayNet paper in the original PDF (A), as rendered Markdown (B) and in JSON representation (C). Spanning table cells, such as the multi-column header \"triple interannotator mAP@0.5-0.95 (%)\", is repeated for each column in the Markdown representation (B), which guarantees that every data point can be traced back to row and column headings only by its grid coordinates in the table. In the JSON representation, the span information is reflected in the fields of each table cell (C).", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/148", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "paragraph", + "prov": [ + { + "page_no": 9, + "bbox": { + "l": 88.67599487304688, + "t": 281.1365966796875, + "r": 504.1103515625, + "b": 213.95611572265625, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 737 + ] + } + ] + } + ], + "headings": [ + "Baselines for Object Detection" + ] + } + }, + { + "text": "9", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/149", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "page_footer", + "prov": [ + { + "page_no": 9, + "bbox": { + "l": 302.54315185546875, + "t": 49.2738037109375, + "r": 308.49029541015625, + "b": 39.96010971069336, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 1 + ] + } + ] + } + ], + "headings": [ + "Baselines for Object Detection" + ] + } + } + ] +} \ No newline at end of file diff --git a/test/data/chunker/0_out_chunks_heading_in_meta_with_extras.json b/test/data/chunker/0_out_chunks_heading_in_meta_with_extras.json deleted file mode 100644 index af629629..00000000 --- a/test/data/chunker/0_out_chunks_heading_in_meta_with_extras.json +++ /dev/null @@ -1,51 +0,0 @@ -{ - "root": [ - { - "path": "#/main-text/0", - "text": "This paragraph is marginally long enough for getting accepted as a chunk.", - "page": 1, - "bbox": [ - 0.0, - 1.0, - 2.0, - 3.0 - ] - }, - { - "path": "#/main-text/4", - "text": "This one should also include the subtitle above since it is long enough.", - "heading": "Some subtitle", - "page": 3, - "bbox": [ - 5.0, - 6.0, - 7.0, - 8.0 - ] - }, - { - "path": "#/tables/0", - "text": "Atomic Vision, Business = Website design. Atomic Vision, Country = United States. Delix Computer GmbH, Business = Computers and software. Delix Computer GmbH, Country = Germany", - "heading": "Acquisitions", - "page": 4, - "bbox": [ - 8.0, - 9.0, - 10.0, - 11.0 - ] - }, - { - "path": "#/main-text/8", - "text": "This paragraph is right before the list.\nSome first bullet content here.\nAnd then some second bullet content here.", - "heading": "Acquisitions", - "page": 4, - "bbox": [ - 8.0, - 9.0, - 10.0, - 11.0 - ] - } - ] -} diff --git a/test/data/chunker/0_out_chunks_heading_in_meta_wout_extras.json b/test/data/chunker/0_out_chunks_heading_in_meta_wout_extras.json deleted file mode 100644 index d45de944..00000000 --- a/test/data/chunker/0_out_chunks_heading_in_meta_wout_extras.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "root": [ - { - "path": "#/main-text/0", - "text": "This paragraph is marginally long enough for getting accepted as a chunk." - }, - { - "path": "#/main-text/4", - "text": "This one should also include the subtitle above since it is long enough.", - "heading": "Some subtitle" - }, - { - "path": "#/tables/0", - "text": "Atomic Vision, Business = Website design. Atomic Vision, Country = United States. Delix Computer GmbH, Business = Computers and software. Delix Computer GmbH, Country = Germany", - "heading": "Acquisitions" - }, - { - "path": "#/main-text/8", - "text": "This paragraph is right before the list.\nSome first bullet content here.\nAnd then some second bullet content here.", - "heading": "Acquisitions" - } - ] -} diff --git a/test/data/chunker/0_out_chunks_heading_in_text_with_extras.json b/test/data/chunker/0_out_chunks_heading_in_text_with_extras.json deleted file mode 100644 index 054bbc59..00000000 --- a/test/data/chunker/0_out_chunks_heading_in_text_with_extras.json +++ /dev/null @@ -1,59 +0,0 @@ -{ - "root": [ - { - "path": "#/main-text/0", - "text": "This paragraph is marginally long enough for getting accepted as a chunk.", - "page": 1, - "bbox": [ - 0.0, - 1.0, - 2.0, - 3.0 - ] - }, - { - "path": "#/main-text/4", - "text": "Some subtitle\nThis one should also include the subtitle above since it is long enough.", - "page": 3, - "bbox": [ - 5.0, - 6.0, - 7.0, - 8.0 - ] - }, - { - "path": "#/tables/0", - "text": "Acquisitions\nAtomic Vision, Business = Website design. Atomic Vision, Country = United States. Delix Computer GmbH, Business = Computers and software. Delix Computer GmbH, Country = Germany", - "page": 4, - "bbox": [ - 8.0, - 9.0, - 10.0, - 11.0 - ] - }, - { - "path": "#/main-text/7", - "text": "Acquisitions\nThis paragraph should actually include the latest subtitle.", - "page": 4, - "bbox": [ - 7.0, - 8.0, - 9.0, - 10.0 - ] - }, - { - "path": "#/main-text/8", - "text": "Acquisitions\nThis paragraph is right before the list.\nSome first bullet content here.\nAnd then some second bullet content here.", - "page": 4, - "bbox": [ - 8.0, - 9.0, - 10.0, - 11.0 - ] - } - ] -} diff --git a/test/data/chunker/0_out_chunks_heading_in_text_wout_extras.json b/test/data/chunker/0_out_chunks_heading_in_text_wout_extras.json deleted file mode 100644 index 994b19bb..00000000 --- a/test/data/chunker/0_out_chunks_heading_in_text_wout_extras.json +++ /dev/null @@ -1,24 +0,0 @@ -{ - "root": [ - { - "path": "#/main-text/0", - "text": "This paragraph is marginally long enough for getting accepted as a chunk." - }, - { - "path": "#/main-text/4", - "text": "Some subtitle\nThis one should also include the subtitle above since it is long enough." - }, - { - "path": "#/tables/0", - "text": "Acquisitions\nAtomic Vision, Business = Website design. Atomic Vision, Country = United States. Delix Computer GmbH, Business = Computers and software. Delix Computer GmbH, Country = Germany" - }, - { - "path": "#/main-text/7", - "text": "Acquisitions\nThis paragraph should actually include the latest subtitle." - }, - { - "path": "#/main-text/8", - "text": "Acquisitions\nThis paragraph is right before the list.\nSome first bullet content here.\nAnd then some second bullet content here." - } - ] -} diff --git a/test/data/chunker/1_out_chunks.json b/test/data/chunker/1_out_chunks.json new file mode 100644 index 00000000..005cd848 --- /dev/null +++ b/test/data/chunker/1_out_chunks.json @@ -0,0 +1,4048 @@ +{ + "root": [ + { + "text": "arXiv:2408.09869v3 [cs.CL] 30 Aug 2024", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/0", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "page_header", + "prov": [ + { + "page_no": 1, + "bbox": { + "l": 17.088111877441406, + "t": 583.2296752929688, + "r": 36.339778900146484, + "b": 231.99996948242188, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 38 + ] + } + ] + } + ] + } + }, + { + "text": "Version 1.0", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/2", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 1, + "bbox": { + "l": 282.772216796875, + "t": 512.7218017578125, + "r": 328.8624572753906, + "b": 503.340087890625, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 11 + ] + } + ] + } + ], + "headings": [ + "Docling Technical Report" + ] + } + }, + { + "text": "Christoph Auer Maksym Lysak Ahmed Nassar Michele Dolfi Nikolaos Livathinos Panos Vagenas Cesar Berrospi Ramis Matteo Omenetti Fabian Lindlbauer Kasper Dinkla Lokesh Mishra Yusik Kim Shubham Gupta Rafael Teixeira de Lima Valery Weber Lucas Morin Ingmar Meijer Viktor Kuropiatnyk Peter W. J. Staar", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/3", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 1, + "bbox": { + "l": 113.4512939453125, + "t": 482.4101257324219, + "r": 498.396728515625, + "b": 439.45928955078125, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 295 + ] + } + ] + } + ], + "headings": [ + "Docling Technical Report" + ] + } + }, + { + "text": "AI4K Group, IBM Research Ruschlikon, Switzerland", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/4", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 1, + "bbox": { + "l": 248.43727111816406, + "t": 428.638427734375, + "r": 362.8905029296875, + "b": 407.99810791015625, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 48 + ] + } + ] + } + ], + "headings": [ + "Docling Technical Report" + ] + } + }, + { + "text": "This technical report introduces Docling , an easy to use, self-contained, MITlicensed open-source package for PDF document conversion. It is powered by state-of-the-art specialized AI models for layout analysis (DocLayNet) and table structure recognition (TableFormer), and runs efficiently on commodity hardware in a small resource budget. The code interface allows for easy extensibility and addition of new features and models.", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/6", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 1, + "bbox": { + "l": 142.92593383789062, + "t": 364.814697265625, + "r": 468.3847351074219, + "b": 300.651123046875, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 431 + ] + } + ] + } + ], + "headings": [ + "Docling Technical Report", + "Abstract" + ] + } + }, + { + "text": "Converting PDF documents back into a machine-processable format has been a major challenge for decades due to their huge variability in formats, weak standardization and printing-optimized characteristic, which discards most structural features and metadata. With the advent of LLMs and popular application patterns such as retrieval-augmented generation (RAG), leveraging the rich content embedded in PDFs has become ever more relevant. In the past decade, several powerful document understanding solutions have emerged on the market, most of which are commercial software, cloud offerings [3] and most recently, multi-modal vision-language models. As of today, only a handful of open-source tools cover PDF conversion, leaving a significant feature and quality gap to proprietary solutions.", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/8", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 1, + "bbox": { + "l": 106.98738098144531, + "t": 240.2642822265625, + "r": 504.3785400390625, + "b": 142.53631591796875, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 792 + ] + } + ] + } + ], + "headings": [ + "1 Introduction" + ] + } + }, + { + "text": "With Docling , we open-source a very capable and efficient document conversion tool which builds on the powerful, specialized AI models and datasets for layout analysis and table structure recognition we developed and presented in the recent past [12, 13, 9]. Docling is designed as a simple, self-contained python library with permissive license, running entirely locally on commodity hardware. Its code architecture allows for easy extensibility and addition of new features and models.", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/9", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 1, + "bbox": { + "l": 107.0031967163086, + "t": 136.7283935546875, + "r": 504.04998779296875, + "b": 83.30133056640625, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 488 + ] + } + ] + } + ], + "headings": [ + "1 Introduction" + ] + } + }, + { + "text": "Docling Technical Report", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/10", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "page_footer", + "prov": [ + { + "page_no": 1, + "bbox": { + "l": 107.10411071777344, + "t": 58.48394775390625, + "r": 200.8249969482422, + "b": 49.8505859375, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 24 + ] + } + ] + } + ], + "headings": [ + "1 Introduction" + ] + } + }, + { + "text": "1", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/11", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "page_footer", + "prov": [ + { + "page_no": 1, + "bbox": { + "l": 303.50897216796875, + "t": 49.50579833984375, + "r": 308.4902648925781, + "b": 39.960147857666016, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 1 + ] + } + ] + } + ], + "headings": [ + "1 Introduction" + ] + } + }, + { + "text": "Here is what Docling delivers today:", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/12", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 2, + "bbox": { + "l": 107.48941802978516, + "t": 717.5628662109375, + "r": 253.97195434570312, + "b": 707.6951293945312, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 36 + ] + } + ] + } + ], + "headings": [ + "1 Introduction" + ] + } + }, + { + "text": "\u00b7 Converts PDF documents to JSON or Markdown format, stable and lightning fast", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/13", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "list_item", + "prov": [ + { + "page_no": 2, + "bbox": { + "l": 134.6504669189453, + "t": 696.156494140625, + "r": 468.3969421386719, + "b": 686.3217163085938, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 78 + ] + } + ] + } + ], + "headings": [ + "1 Introduction" + ] + } + }, + { + "text": "\u00b7 Understands detailed page layout, reading order, locates figures and recovers table structures", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/14", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "list_item", + "prov": [ + { + "page_no": 2, + "bbox": { + "l": 134.72218322753906, + "t": 681.3009643554688, + "r": 504.0032653808594, + "b": 660.819091796875, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 96 + ] + } + ] + } + ], + "headings": [ + "1 Introduction" + ] + } + }, + { + "text": "\u00b7 Extracts metadata from the document, such as title, authors, references and language", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/15", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "list_item", + "prov": [ + { + "page_no": 2, + "bbox": { + "l": 134.9065399169922, + "t": 655.3751220703125, + "r": 480.8502502441406, + "b": 645.7429809570312, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 86 + ] + } + ] + } + ], + "headings": [ + "1 Introduction" + ] + } + }, + { + "text": "\u00b7 Optionally applies OCR, e.g. for scanned PDFs", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/16", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "list_item", + "prov": [ + { + "page_no": 2, + "bbox": { + "l": 134.8793487548828, + "t": 640.9143676757812, + "r": 333.46343994140625, + "b": 630.7002563476562, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 47 + ] + } + ] + } + ], + "headings": [ + "1 Introduction" + ] + } + }, + { + "text": "\u00b7 Can be configured to be optimal for batch-mode (i.e high throughput, low time-to-solution) or interactive mode (compromise on efficiency, low time-to-solution)", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/17", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "list_item", + "prov": [ + { + "page_no": 2, + "bbox": { + "l": 135.0067901611328, + "t": 626.0984497070312, + "r": 504.003173828125, + "b": 604.8719482421875, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 161 + ] + } + ] + } + ], + "headings": [ + "1 Introduction" + ] + } + }, + { + "text": "\u00b7 Can leverage different accelerators (GPU, MPS, etc).", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/18", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "list_item", + "prov": [ + { + "page_no": 2, + "bbox": { + "l": 134.7841339111328, + "t": 600.127685546875, + "r": 355.41107177734375, + "b": 590.395751953125, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 54 + ] + } + ] + } + ], + "headings": [ + "1 Introduction" + ] + } + }, + { + "text": "To use Docling, you can simply install the docling package from PyPI. Documentation and examples are available in our GitHub repository at github.com/DS4SD/docling. All required model assets 1 are downloaded to a local huggingface datasets cache on first use, unless you choose to pre-install the model assets in advance.", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/20", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 2, + "bbox": { + "l": 107.22560119628906, + "t": 548.7847900390625, + "r": 504.00341796875, + "b": 506.27606201171875, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 321 + ] + } + ] + } + ], + "headings": [ + "2 Getting Started" + ] + } + }, + { + "text": "Docling provides an easy code interface to convert PDF documents from file system, URLs or binary streams, and retrieve the output in either JSON or Markdown format. For convenience, separate methods are offered to convert single documents or batches of documents. A basic usage example is illustrated below. Further examples are available in the Doclign code repository.", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/21", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 2, + "bbox": { + "l": 107.38473510742188, + "t": 499.5434875488281, + "r": 504.0034484863281, + "b": 456.7132263183594, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 371 + ] + } + ] + } + ], + "headings": [ + "2 Getting Started" + ] + } + }, + { + "text": "from docling.document_converter import DocumentConverter source = \"https :// arxiv.org/pdf /2206.01062\" # PDF path or URL converter = DocumentConverter () result = converter.convert_single(source) print(result.render_as_markdown ()) # output: \"## DocLayNet: A Large Human -Annotated Dataset for Document -Layout Analysis [...]\"", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/22", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "code", + "prov": [ + { + "page_no": 2, + "bbox": { + "l": 107.45667266845703, + "t": 449.7299499511719, + "r": 491.58642578125, + "b": 380.3858642578125, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 327 + ] + } + ] + } + ], + "headings": [ + "2 Getting Started" + ] + } + }, + { + "text": "Optionally, you can configure custom pipeline features and runtime options, such as turning on or off features (e.g. OCR, table structure recognition), enforcing limits on the input document size, and defining the budget of CPU threads. Advanced usage examples and options are documented in the README file. Docling also provides a Dockerfile to demonstrate how to install and run it inside a container.", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/23", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 2, + "bbox": { + "l": 107.32361602783203, + "t": 368.8786926269531, + "r": 504.3451843261719, + "b": 315.56304931640625, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 403 + ] + } + ] + } + ], + "headings": [ + "2 Getting Started" + ] + } + }, + { + "text": "Docling implements a linear pipeline of operations, which execute sequentially on each given document (see Fig. 1). Each document is first parsed by a PDF backend, which retrieves the programmatic text tokens, consisting of string content and its coordinates on the page, and also renders a bitmap image of each page to support downstream operations. Then, the standard model pipeline applies a sequence of AI models independently on every page in the document to extract features and content, such as layout and table structures. Finally, the results from all pages are aggregated and passed through a post-processing stage, which augments metadata, detects the document language, infers reading-order and eventually assembles a typed document object which can be serialized to JSON or Markdown.", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/25", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 2, + "bbox": { + "l": 107.176025390625, + "t": 273.72723388671875, + "r": 504.06005859375, + "b": 176.83807373046875, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 796 + ] + } + ] + } + ], + "headings": [ + "3 Processing pipeline" + ] + } + }, + { + "text": "Two basic requirements to process PDF documents in our pipeline are a) to retrieve all text content and their geometric coordinates on each page and b) to render the visual representation of each page as it would appear in a PDF viewer. Both these requirements are encapsulated in Docling's PDF backend interface. While there are several open-source PDF parsing libraries available for python, we faced major obstacles with all of them for different reasons, among which were restrictive", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/27", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 2, + "bbox": { + "l": 107.26972198486328, + "t": 142.07904052734375, + "r": 504.2434997558594, + "b": 87.39227294921875, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 487 + ] + } + ] + } + ], + "headings": [ + "3.1 PDF backends" + ] + } + }, + { + "text": "$^{1}$see huggingface.co/ds4sd/docling-models/", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/28", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "footnote", + "prov": [ + { + "page_no": 2, + "bbox": { + "l": 120.65299987792969, + "t": 78.96942138671875, + "r": 276.9403076171875, + "b": 69.9141845703125, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 46 + ] + } + ] + } + ], + "headings": [ + "3.1 PDF backends" + ] + } + }, + { + "text": "2", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/29", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "page_footer", + "prov": [ + { + "page_no": 2, + "bbox": { + "l": 302.96832275390625, + "t": 49.7403564453125, + "r": 308.49029541015625, + "b": 39.960079193115234, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 1 + ] + } + ] + } + ], + "headings": [ + "3.1 PDF backends" + ] + } + }, + { + "text": "Figure 1: Sketch of Docling's default processing pipeline. The inner part of the model pipeline is easily customizable and extensible.", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/30", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "caption", + "prov": [ + { + "page_no": 3, + "bbox": { + "l": 107.11122131347656, + "t": 570.7063598632812, + "r": 504.00335693359375, + "b": 550.3002319335938, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 134 + ] + } + ] + } + ], + "headings": [ + "3.1 PDF backends" + ] + } + }, + { + "text": "licensing (e.g. pymupdf [7]), poor speed or unrecoverable quality issues, such as merged text cells across far-apart text tokens or table columns (pypdfium, PyPDF) [15, 14].", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/31", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 3, + "bbox": { + "l": 107.37481689453125, + "t": 525.6080932617188, + "r": 504.0033264160156, + "b": 504.8570861816406, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 173 + ] + } + ] + } + ], + "headings": [ + "3.1 PDF backends" + ] + } + }, + { + "text": "We therefore decided to provide multiple backend choices, and additionally open-source a custombuilt PDF parser, which is based on the low-level qpdf [4] library. It is made available in a separate package named docling-parse and powers the default PDF backend in Docling. As an alternative, we provide a PDF backend relying on pypdfium , which may be a safe backup choice in certain cases, e.g. if issues are seen with particular font encodings.", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/32", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 3, + "bbox": { + "l": 107.10971069335938, + "t": 498.21685791015625, + "r": 504.0033874511719, + "b": 443.9909973144531, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 446 + ] + } + ] + } + ], + "headings": [ + "3.1 PDF backends" + ] + } + }, + { + "text": "As part of Docling, we initially release two highly capable AI models to the open-source community, which have been developed and published recently by our team. The first model is a layout analysis model, an accurate object-detector for page elements [13]. The second model is TableFormer [12, 9], a state-of-the-art table structure recognition model. We provide the pre-trained weights (hosted on huggingface) and a separate package for the inference code as docling-ibm-models . Both models are also powering the open-access deepsearch-experience, our cloud-native service for knowledge exploration tasks.", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/34", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 3, + "bbox": { + "l": 107.07593536376953, + "t": 406.1695251464844, + "r": 504.1148681640625, + "b": 330.2677307128906, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 608 + ] + } + ] + } + ], + "headings": [ + "3.2 AI models" + ] + } + }, + { + "text": "Our layout analysis model is an object-detector which predicts the bounding-boxes and classes of various elements on the image of a given page. Its architecture is derived from RT-DETR [16] and re-trained on DocLayNet [13], our popular human-annotated dataset for document-layout analysis, among other proprietary datasets. For inference, our implementation relies on the onnxruntime [5].", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/36", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 3, + "bbox": { + "l": 107.1727294921875, + "t": 294.7471923828125, + "r": 504.1613464355469, + "b": 251.51837158203125, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 388 + ] + } + ] + } + ], + "headings": [ + "Layout Analysis Model" + ] + } + }, + { + "text": "The Docling pipeline feeds page images at 72 dpi resolution, which can be processed on a single CPU with sub-second latency. All predicted bounding-box proposals for document elements are post-processed to remove overlapping proposals based on confidence and size, and then intersected with the text tokens in the PDF to group them into meaningful and complete units such as paragraphs, section titles, list items, captions, figures or tables.", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/37", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 3, + "bbox": { + "l": 107.23725891113281, + "t": 245.4161376953125, + "r": 504.00347900390625, + "b": 191.62884521484375, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 443 + ] + } + ] + } + ], + "headings": [ + "Layout Analysis Model" + ] + } + }, + { + "text": "The TableFormer model [12], first published in 2022 and since refined with a custom structure token language [9], is a vision-transformer model for table structure recovery. It can predict the logical row and column structure of a given table based on an input image, and determine which table cells belong to column headers, row headers or the table body. Compared to earlier approaches, TableFormer handles many characteristics of tables, such as partial or no borderlines, empty cells, rows or columns, cell spans and hierarchy both on column-heading or row-heading level, tables with inconsistent indentation or alignment and other complexities. For inference, our implementation relies on PyTorch [2].", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/39", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 3, + "bbox": { + "l": 107.22769927978516, + "t": 156.10821533203125, + "r": 504.01800537109375, + "b": 69.84173583984375, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 706 + ] + } + ] + } + ], + "headings": [ + "Table Structure Recognition" + ] + } + }, + { + "text": "3", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/40", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "page_footer", + "prov": [ + { + "page_no": 3, + "bbox": { + "l": 302.7810974121094, + "t": 49.40008544921875, + "r": 308.4903259277344, + "b": 39.96010971069336, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 1 + ] + } + ] + } + ], + "headings": [ + "Table Structure Recognition" + ] + } + }, + { + "text": "The Docling pipeline feeds all table objects detected in the layout analysis to the TableFormer model, by providing an image-crop of the table and the included text cells. TableFormer structure predictions are matched back to the PDF cells in post-processing to avoid expensive re-transcription text in the table image. Typical tables require between 2 and 6 seconds to be processed on a standard CPU, strongly depending on the amount of included table cells.", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/41", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 4, + "bbox": { + "l": 107.23402404785156, + "t": 717.677001953125, + "r": 504.0035095214844, + "b": 664.2490844726562, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 459 + ] + } + ] + } + ], + "headings": [ + "Table Structure Recognition" + ] + } + }, + { + "text": "Docling provides optional support for OCR, for example to cover scanned PDFs or content in bitmaps images embedded on a page. In our initial release, we rely on EasyOCR [1], a popular thirdparty OCR library with support for many languages. Docling, by default, feeds a high-resolution page image (216 dpi) to the OCR engine, to allow capturing small print detail in decent quality. While EasyOCR delivers reasonable transcription quality, we observe that it runs fairly slow on CPU (upwards of 30 seconds per page).", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/43", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 4, + "bbox": { + "l": 107.0999526977539, + "t": 632.9981689453125, + "r": 504.00347900390625, + "b": 568.0103759765625, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 515 + ] + } + ] + } + ], + "headings": [ + "OCR" + ] + } + }, + { + "text": "We are actively seeking collaboration from the open-source community to extend Docling with additional OCR backends and speed improvements.", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/44", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 4, + "bbox": { + "l": 107.48332214355469, + "t": 561.5487670898438, + "r": 504.0033874511719, + "b": 540.876953125, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 139 + ] + } + ] + } + ], + "headings": [ + "OCR" + ] + } + }, + { + "text": "In the final pipeline stage, Docling assembles all prediction results produced on each page into a well-defined datatype that encapsulates a converted document, as defined in the auxiliary package docling-core . The generated document object is passed through a post-processing model which leverages several algorithms to augment features, such as detection of the document language, correcting the reading order, matching figures with captions and labelling metadata such as title, authors and references. The final output can then be serialized to JSON or transformed into a Markdown representation at the users request.", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/46", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 4, + "bbox": { + "l": 107.259033203125, + "t": 506.85528564453125, + "r": 504.2517395019531, + "b": 431.21771240234375, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 622 + ] + } + ] + } + ], + "headings": [ + "3.3 Assembly" + ] + } + }, + { + "text": "Docling provides a straight-forward interface to extend its capabilities, namely the model pipeline. A model pipeline constitutes the central part in the processing, following initial document parsing and preceding output assembly, and can be fully customized by sub-classing from an abstract baseclass ( BaseModelPipeline ) or cloning the default model pipeline. This effectively allows to fully customize the chain of models, add or replace models, and introduce additional pipeline configuration parameters. To use a custom model pipeline, the custom pipeline class to instantiate can be provided as an argument to the main document conversion methods. We invite everyone in the community to propose additional or alternative models and improvements.", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/48", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 4, + "bbox": { + "l": 107.01625061035156, + "t": 397.58544921875, + "r": 504.00347900390625, + "b": 311.05523681640625, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 753 + ] + } + ] + } + ], + "headings": [ + "3.4 Extensibility" + ] + } + }, + { + "text": "Implementations of model classes must satisfy the python Callable interface. The __call__ method must accept an iterator over page objects, and produce another iterator over the page objects which were augmented with the additional features predicted by the model, by extending the provided PagePredictions data model accordingly.", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/49", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 4, + "bbox": { + "l": 106.94336700439453, + "t": 304.5326232910156, + "r": 504.0707092285156, + "b": 262.160400390625, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 330 + ] + } + ] + } + ], + "headings": [ + "3.4 Extensibility" + ] + } + }, + { + "text": "In this section, we establish some reference numbers for the processing speed of Docling and the resource budget it requires. All tests in this section are run with default options on our standard test set distributed with Docling, which consists of three papers from arXiv and two IBM Redbooks, with a total of 225 pages. Measurements were taken using both available PDF backends on two different hardware systems: one MacBook Pro M3 Max, and one bare-metal server running Ubuntu 20.04 LTS on an Intel Xeon E5-2690 CPU. For reproducibility, we fixed the thread budget (through setting OMP NUM THREADS environment variable ) once to 4 (Docling default) and once to 16 (equal to full core count on the test hardware). All results are shown in Table 1.", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/51", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 4, + "bbox": { + "l": 107.0430679321289, + "t": 221.5301513671875, + "r": 504.22869873046875, + "b": 135.16595458984375, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 750 + ] + } + ] + } + ], + "headings": [ + "4 Performance" + ] + } + }, + { + "text": "If you need to run Docling in very low-resource environments, please consider configuring the pypdfium backend. While it is faster and more memory efficient than the default docling-parse backend, it will come at the expense of worse quality results, especially in table structure recovery.", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/52", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 4, + "bbox": { + "l": 107.19568634033203, + "t": 128.8489990234375, + "r": 504.0033874511719, + "b": 96.76458740234375, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 290 + ] + } + ] + } + ], + "headings": [ + "4 Performance" + ] + } + }, + { + "text": "Establishing GPU acceleration support for the AI models is currently work-in-progress and largely untested, but may work implicitly when CUDA is available and discovered by the onnxruntime and", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/53", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 4, + "bbox": { + "l": 107.47733306884766, + "t": 90.18896484375, + "r": 504.123046875, + "b": 69.5284423828125, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 192 + ] + } + ] + } + ], + "headings": [ + "4 Performance" + ] + } + }, + { + "text": "4", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/54", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "page_footer", + "prov": [ + { + "page_no": 4, + "bbox": { + "l": 302.41058349609375, + "t": 49.65472412109375, + "r": 308.49029541015625, + "b": 39.960079193115234, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 1 + ] + } + ] + } + ], + "headings": [ + "4 Performance" + ] + } + }, + { + "text": "torch runtimes backing the Docling pipeline. We will deliver updates on this topic at in a future version of this report.", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/55", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 5, + "bbox": { + "l": 107.42681121826172, + "t": 717.5958862304688, + "r": 504.0035400390625, + "b": 696.97607421875, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 121 + ] + } + ] + } + ], + "headings": [ + "4 Performance" + ] + } + }, + { + "text": "Table 1: Runtime characteristics of Docling with the standard model pipeline and settings, on our test dataset of 225 pages, on two different systems. OCR is disabled. We show the time-to-solution (TTS), computed throughput in pages per second, and the peak memory used (resident set size) for both the Docling-native PDF backend and for the pypdfium backend, using 4 and 16 threads.", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/56", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "caption", + "prov": [ + { + "page_no": 5, + "bbox": { + "l": 107.0246810913086, + "t": 686.1126708984375, + "r": 504.30712890625, + "b": 643.7755126953125, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 383 + ] + } + ] + } + ], + "headings": [ + "4 Performance" + ] + } + }, + { + "text": "Thanks to the high-quality, richly structured document conversion achieved by Docling, its output qualifies for numerous downstream applications. For example, Docling can provide a base for detailed enterprise document search, passage retrieval or classification use-cases, or support knowledge extraction pipelines, allowing specific treatment of different structures in the document, such as tables, figures, section structure or references. For popular generative AI application patterns, such as retrieval-augmented generation (RAG), we provide quackling , an open-source package which capitalizes on Docling's feature-rich document output to enable document-native optimized vector embedding and chunking. It plugs in seamlessly with LLM frameworks such as LlamaIndex [8]. Since Docling is fast, stable and cheap to run, it also makes for an excellent choice to build document-derived datasets. With its powerful table structure recognition, it provides significant benefit to automated knowledge-base construction [11, 10]. Docling is also integrated within the open IBM data prep kit [6], which implements scalable data transforms to build large-scale multi-modal training datasets.", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/58", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 5, + "bbox": { + "l": 107.10533142089844, + "t": 504.97296142578125, + "r": 504.0229187011719, + "b": 364.4931335449219, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 1189 + ] + } + ] + } + ], + "headings": [ + "5 Applications" + ] + } + }, + { + "text": "Docling is designed to allow easy extension of the model library and pipelines. In the future, we plan to extend Docling with several more models, such as a figure-classifier model, an equationrecognition model, a code-recognition model and more. This will help improve the quality of conversion for specific types of content, as well as augment extracted document metadata with additional information. Further investment into testing and optimizing GPU acceleration as well as improving the Docling-native PDF backend are on our roadmap, too.", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/60", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 5, + "bbox": { + "l": 106.92281341552734, + "t": 323.5386657714844, + "r": 504.00347900390625, + "b": 258.76641845703125, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 543 + ] + } + ] + } + ], + "headings": [ + "6 Future work and contributions" + ] + } + }, + { + "text": "We encourage everyone to propose or implement additional features and models, and will gladly take your inputs and contributions under review . The codebase of Docling is open for use and contribution, under the MIT license agreement and in alignment with our contributing guidelines included in the Docling repository. If you use Docling in your projects, please consider citing this technical report.", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/61", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 5, + "bbox": { + "l": 107.04397583007812, + "t": 252.4183349609375, + "r": 504.0430908203125, + "b": 198.77685546875, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 402 + ] + } + ] + } + ], + "headings": [ + "6 Future work and contributions" + ] + } + }, + { + "text": "[1] J. AI. Easyocr: Ready-to-use ocr with 80+ supported languages. https://github.com/ JaidedAI/EasyOCR , 2024. Version: 1.7.0.", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/63", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "list_item", + "prov": [ + { + "page_no": 5, + "bbox": { + "l": 112.33451843261719, + "t": 163.731201171875, + "r": 504.0009460449219, + "b": 142.08197021484375, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 127 + ] + } + ] + } + ], + "headings": [ + "References" + ] + } + }, + { + "text": "[2] J. Ansel, E. Yang, H. He, N. Gimelshein, A. Jain, M. Voznesensky, B. Bao, P. Bell, D. Berard, E. Burovski, G. Chauhan, A. Chourdia, W. Constable, A. Desmaison, Z. DeVito, E. Ellison, W. Feng, J. Gong, M. Gschwind, B. Hirsh, S. Huang, K. Kalambarkar, L. Kirsch, M. Lazos, M. Lezcano, Y. Liang, J. Liang, Y. Lu, C. Luk, B. Maher, Y. Pan, C. Puhrsch, M. Reso, M. Saroufim, M. Y. Siraichi, H. Suk, M. Suo, P. Tillet, E. Wang, X. Wang, W. Wen, S. Zhang, X. Zhao, K. Zhou, R. Zou, A. Mathews, G. Chanan, P. Wu, and S. Chintala. Pytorch 2: Faster", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/64", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "list_item", + "prov": [ + { + "page_no": 5, + "bbox": { + "l": 112.45421600341797, + "t": 134.16204833984375, + "r": 504.0035095214844, + "b": 69.84818267822266, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 543 + ] + } + ] + } + ], + "headings": [ + "References" + ] + } + }, + { + "text": "5", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/65", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "page_footer", + "prov": [ + { + "page_no": 5, + "bbox": { + "l": 302.7286376953125, + "t": 49.4200439453125, + "r": 308.49029541015625, + "b": 39.96018600463867, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 1 + ] + } + ] + } + ], + "headings": [ + "References" + ] + } + }, + { + "text": "machine learning through dynamic python bytecode transformation and graph compilation. In Proceedings of the 29th ACM International Conference on Architectural Support for Programming Languages and Operating Systems, Volume 2 (ASPLOS '24) . ACM, 4 2024. doi: 10.1145/3620665.3640366. URL https://pytorch.org/assets/pytorch2-2.pdf .", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/66", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 6, + "bbox": { + "l": 129.0050048828125, + "t": 717.4641723632812, + "r": 504.0033264160156, + "b": 674.812744140625, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 331 + ] + } + ] + } + ], + "headings": [ + "References" + ] + } + }, + { + "text": "[3] C. Auer, M. Dolfi, A. Carvalho, C. B. Ramis, and P. W. Staar. Delivering document conversion as a cloud service with high throughput and responsiveness. In 2022 IEEE 15th International Conference on Cloud Computing (CLOUD) , pages 363-373. IEEE, 2022.", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/67", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "list_item", + "prov": [ + { + "page_no": 6, + "bbox": { + "l": 112.47968292236328, + "t": 665.970458984375, + "r": 504.3585510253906, + "b": 634.421630859375, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 255 + ] + } + ] + } + ], + "headings": [ + "References" + ] + } + }, + { + "text": "[4] J. Berkenbilt. Qpdf: A content-preserving pdf document transformer, 2024. URL https: //github.com/qpdf/qpdf .", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/68", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "list_item", + "prov": [ + { + "page_no": 6, + "bbox": { + "l": 112.59274291992188, + "t": 625.3558349609375, + "r": 504.00018310546875, + "b": 603.854736328125, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 113 + ] + } + ] + } + ], + "headings": [ + "References" + ] + } + }, + { + "text": "[5] O. R. developers. Onnx runtime. https://onnxruntime.ai/ , 2024. Version: 1.18.1.", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/69", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "list_item", + "prov": [ + { + "page_no": 6, + "bbox": { + "l": 112.65106964111328, + "t": 595.5201416015625, + "r": 478.88665771484375, + "b": 585.318359375, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 84 + ] + } + ] + } + ], + "headings": [ + "References" + ] + } + }, + { + "text": "[6] IBM. Data Prep Kit: a community project to democratize and accelerate unstructured data preparation for LLM app developers, 2024. URL https://github.com/IBM/ data-prep-kit .", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/70", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "list_item", + "prov": [ + { + "page_no": 6, + "bbox": { + "l": 112.5077896118164, + "t": 576.7722778320312, + "r": 504.0283508300781, + "b": 544.3335571289062, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 177 + ] + } + ] + } + ], + "headings": [ + "References" + ] + } + }, + { + "text": "[7] A. S. Inc. PyMuPDF, 2024. URL https://github.com/pymupdf/PyMuPDF .", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/71", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "list_item", + "prov": [ + { + "page_no": 6, + "bbox": { + "l": 112.71062469482422, + "t": 536.3712768554688, + "r": 447.4246826171875, + "b": 526.034423828125, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 70 + ] + } + ] + } + ], + "headings": [ + "References" + ] + } + }, + { + "text": "[8] J. Liu. LlamaIndex, 11 2022. URL https://github.com/jerryjliu/llama_index .", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/72", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "list_item", + "prov": [ + { + "page_no": 6, + "bbox": { + "l": 112.72732543945312, + "t": 516.6817016601562, + "r": 483.91107177734375, + "b": 506.7769470214844, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 79 + ] + } + ] + } + ], + "headings": [ + "References" + ] + } + }, + { + "text": "[9] M. Lysak, A. Nassar, N. Livathinos, C. Auer, and P. Staar. Optimized Table Tokenization for Table Structure Recognition. In Document Analysis and Recognition - ICDAR 2023: 17th International Conference, San Jos'e, CA, USA, August 21-26, 2023, Proceedings, Part II , pages 37-50, Berlin, Heidelberg, Aug. 2023. Springer-Verlag. ISBN 978-3-031-41678-1. doi: 10. 1007/978-3-031-41679-8 3. URL https://doi.org/10.1007/978-3-031-41679-8_3 .", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/73", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "list_item", + "prov": [ + { + "page_no": 6, + "bbox": { + "l": 112.50459289550781, + "t": 498.0171203613281, + "r": 504.004638671875, + "b": 444.5917053222656, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 439 + ] + } + ] + } + ], + "headings": [ + "References" + ] + } + }, + { + "text": "[10] L. Mishra, S. Dhibi, Y. Kim, C. Berrospi Ramis, S. Gupta, M. Dolfi, and P. Staar. Statements: Universal information extraction from tables with large language models for ESG KPIs. In D. Stammbach, J. Ni, T. Schimanski, K. Dutia, A. Singh, J. Bingler, C. Christiaen, N. Kushwaha, V. Muccione, S. A. Vaghefi, and M. Leippold, editors, Proceedings of the 1st Workshop on Natural Language Processing Meets Climate Change (ClimateNLP 2024) , pages 193-214, Bangkok, Thailand, Aug. 2024. Association for Computational Linguistics. URL https://aclanthology.org/2024.climatenlp-1.15 .", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/74", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "list_item", + "prov": [ + { + "page_no": 6, + "bbox": { + "l": 107.49420928955078, + "t": 435.72955322265625, + "r": 504.1082458496094, + "b": 359.86444091796875, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 581 + ] + } + ] + } + ], + "headings": [ + "References" + ] + } + }, + { + "text": "[11] L. Morin, V. Weber, G. I. Meijer, F. Yu, and P. W. J. Staar. Patcid: an open-access dataset of chemical structures in patent documents. Nature Communications , 15(1):6532, August 2024. ISSN 2041-1723. doi: 10.1038/s41467-024-50779-y. URL https://doi.org/10.1038/ s41467-024-50779-y .", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/75", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "list_item", + "prov": [ + { + "page_no": 6, + "bbox": { + "l": 107.34581756591797, + "t": 351.3507995605469, + "r": 504.6417541503906, + "b": 308.78851318359375, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 288 + ] + } + ] + } + ], + "headings": [ + "References" + ] + } + }, + { + "text": "[12] A. Nassar, N. Livathinos, M. Lysak, and P. Staar. Tableformer: Table structure understanding with transformers. In Proceedings of the IEEE/CVF Conference on Computer Vision and Pattern Recognition , pages 4614-4623, 2022.", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/76", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "list_item", + "prov": [ + { + "page_no": 6, + "bbox": { + "l": 107.38827514648438, + "t": 299.4344177246094, + "r": 504.3544616699219, + "b": 268.1841125488281, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 226 + ] + } + ] + } + ], + "headings": [ + "References" + ] + } + }, + { + "text": "[13] B. Pfitzmann, C. Auer, M. Dolfi, A. S. Nassar, and P. Staar. Doclaynet: a large humanannotated dataset for document-layout segmentation. pages 3743-3751, 2022.", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/77", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "list_item", + "prov": [ + { + "page_no": 6, + "bbox": { + "l": 107.36676788330078, + "t": 258.790283203125, + "r": 504.00341796875, + "b": 238.3961181640625, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 164 + ] + } + ] + } + ], + "headings": [ + "References" + ] + } + }, + { + "text": "[14] pypdf Maintainers. pypdf: A Pure-Python PDF Library, 2024. URL https://github.com/ py-pdf/pypdf .", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/78", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "list_item", + "prov": [ + { + "page_no": 6, + "bbox": { + "l": 107.28363800048828, + "t": 229.4072265625, + "r": 504.00091552734375, + "b": 207.166748046875, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 102 + ] + } + ] + } + ], + "headings": [ + "References" + ] + } + }, + { + "text": "[15] P. Team. PyPDFium2: Python bindings for PDFium, 2024. URL https://github.com/ pypdfium2-team/pypdfium2 .", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/79", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "list_item", + "prov": [ + { + "page_no": 6, + "bbox": { + "l": 107.2214584350586, + "t": 199.6893310546875, + "r": 504.0008850097656, + "b": 177.491455078125, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 109 + ] + } + ] + } + ], + "headings": [ + "References" + ] + } + }, + { + "text": "[16] Y. Zhao, W. Lv, S. Xu, J. Wei, G. Wang, Q. Dang, Y. Liu, and J. Chen. Detrs beat yolos on real-time object detection, 2023.", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/80", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "list_item", + "prov": [ + { + "page_no": 6, + "bbox": { + "l": 107.28424072265625, + "t": 169.70806884765625, + "r": 504.0033264160156, + "b": 148.91436767578125, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 128 + ] + } + ] + } + ], + "headings": [ + "References" + ] + } + }, + { + "text": "6", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/81", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "page_footer", + "prov": [ + { + "page_no": 6, + "bbox": { + "l": 302.7389221191406, + "t": 49.36236572265625, + "r": 308.5960998535156, + "b": 39.96012496948242, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 1 + ] + } + ] + } + ], + "headings": [ + "References" + ] + } + }, + { + "text": "In this section, we illustrate a few examples of Docling' s output in Markdown and JSON.", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/83", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 7, + "bbox": { + "l": 107.6931381225586, + "t": 694.013671875, + "r": 463.7545471191406, + "b": 684.3182373046875, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 88 + ] + } + ] + } + ], + "headings": [ + "Appendix" + ] + } + }, + { + "text": "Birgit Pfitzmann IBM Research Rueschlikon, Switzerland bpf@zurich.ibm.com", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/85", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 7, + "bbox": { + "l": 138.0285186767578, + "t": 650.9168701171875, + "r": 176.45944213867188, + "b": 631.6739501953125, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 73 + ] + } + ] + } + ], + "headings": [ + "DocLayNet: A Large Human-Annotated Dataset for Document-Layout Analysis" + ] + } + }, + { + "text": "Christoph Auer IBM Research Rueschlikon, Switzerland cau@zurich.ibm.com", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/86", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 7, + "bbox": { + "l": 199.22952270507812, + "t": 650.9168701171875, + "r": 237.34890747070312, + "b": 631.6729125976562, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 71 + ] + } + ] + } + ], + "headings": [ + "DocLayNet: A Large Human-Annotated Dataset for Document-Layout Analysis" + ] + } + }, + { + "text": "Michele Dolfi IBM Research Rueschlikon, Switzerland dol@zurich.ibm.com", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/87", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 7, + "bbox": { + "l": 260.11895751953125, + "t": 650.9168701171875, + "r": 298.3296203613281, + "b": 631.549072265625, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 70 + ] + } + ] + } + ], + "headings": [ + "DocLayNet: A Large Human-Annotated Dataset for Document-Layout Analysis" + ] + } + }, + { + "text": "Ahmed S. Nassar IBM Research Rueschlikon, Switzerland ahn@zurich.ibm.com", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/88", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 7, + "bbox": { + "l": 168.40359497070312, + "t": 629.259521484375, + "r": 206.98048400878906, + "b": 609.97509765625, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 72 + ] + } + ] + } + ], + "headings": [ + "DocLayNet: A Large Human-Annotated Dataset for Document-Layout Analysis" + ] + } + }, + { + "text": "Peter Staar IBM Research Rueschlikon, Switzerland taa@zurich.ibm.com", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/89", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 7, + "bbox": { + "l": 229.48968505859375, + "t": 629.259521484375, + "r": 267.6090393066406, + "b": 610.0166015625, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 68 + ] + } + ] + } + ], + "headings": [ + "DocLayNet: A Large Human-Annotated Dataset for Document-Layout Analysis" + ] + } + }, + { + "text": "Accurate document layout analysis is a key requirement for highquality PDF document conversion. With the recent availability of public, large ground-truth datasets such as PubLayNet and DocBank, deep-learning models have proven to be very effective at layout detection and segmentation. While these datasets are of adequate size to train such models, they severely lack in layout variability since they are sourced from scientific article repositories such as PubMed and arXiv only. Consequently, the accuracy of the layout segmentation drops significantly when these models are applied on more challenging and diverse layouts. In this paper, we present $_{DocLayNet}$, a new, publicly available, document-layout annotation dataset in COCO format. It contains 80863 manually annotated pages from diverse data sources to represent a wide variability in layouts. For each PDF page, the layout annotations provide labelled bounding-boxes with a choice of 11 distinct classes. DocLayNet also provides a subset of double- and triple-annotated pages to determine the inter-annotator agreement. In multiple experiments, we provide baseline accuracy scores (in mAP) for a set of popular object detection models. We also demonstrate that these models fall approximately 10% behind the inter-annotator agreement. Furthermore, we provide evidence that DocLayNet is of sufficient size. Lastly, we compare models trained on PubLayNet, DocBank and DocLayNet, showing that layout predictions of the DocLayNettrained models are more robust and thus the preferred choice for general-purpose document-layout analysis.", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/91", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 7, + "bbox": { + "l": 123.66893768310547, + "t": 602.5093994140625, + "r": 214.2318878173828, + "b": 500.3504333496094, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 1599 + ] + } + ] + } + ], + "headings": [ + "ABSTRACT" + ] + } + }, + { + "text": "\u00b7 Information systems \u2192 Document structure ; \u00b7 Applied computing \u2192 Document analysis ; \u00b7 Computing methodologies \u2192 Machine learning ; Computer vision ; $_{Object detection}$;", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/93", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 7, + "bbox": { + "l": 124.05064392089844, + "t": 490.005126953125, + "r": 215.08236694335938, + "b": 476.94268798828125, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 174 + ] + } + ] + } + ], + "headings": [ + "CCS CONCEPTS" + ] + } + }, + { + "text": "Permission to make digital or hard copies of part or all of this work for personal or classroom use is granted without fee provided that copies are not made or distributed for profi t or commercial advantage and that copies bear this notice and the full citation on thefirst page. Copyrights for third-party components of this work must be honored. For all other uses, contact the owner/author(s). KDD '22, August 14-18, 2022, Washington, DC, USA \u00a9 2022 Copyright held by the owner/author(s). ACM ISBN 978-1-4503-9385-0/22/08. https://doi.org/10.1145/3534678.3539043", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/94", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 7, + "bbox": { + "l": 123.8716049194336, + "t": 464.7064514160156, + "r": 214.06785583496094, + "b": 436.57623291015625, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 566 + ] + } + ] + } + ], + "headings": [ + "CCS CONCEPTS" + ] + } + }, + { + "text": "Birgit Pfitzmann IBM Research Rueschlikon, Switzerland bpf@zurich.ibm.com", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/96", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 7, + "bbox": { + "l": 329.2007141113281, + "t": 657.4287109375, + "r": 433.130126953125, + "b": 653.031005859375, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 73 + ] + } + ] + } + ], + "headings": [ + "DocLayNet: A Large Human-Annotated Dataset for Document-Layout Analysis" + ] + } + }, + { + "text": "Christoph Auer IBM Research Rueschlikon, Switzerland cau@zurich.ibm.com", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/97", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 7, + "bbox": { + "l": 329.6015930175781, + "t": 648.9207153320312, + "r": 432.7991943359375, + "b": 645.91748046875, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 71 + ] + } + ] + } + ], + "headings": [ + "DocLayNet: A Large Human-Annotated Dataset for Document-Layout Analysis" + ] + } + }, + { + "text": "Michele Dolfi IBM Research Rueschlikon, Switzerland dol@zurich.ibm.com", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/98", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 7, + "bbox": { + "l": 329.18927001953125, + "t": 641.90869140625, + "r": 429.5950012207031, + "b": 637.8482666015625, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 70 + ] + } + ] + } + ], + "headings": [ + "DocLayNet: A Large Human-Annotated Dataset for Document-Layout Analysis" + ] + } + }, + { + "text": "Ahmed S. Nassar IBM Research Rueschlikon, Switzerland ahn@zurich.ibm.com", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/99", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 7, + "bbox": { + "l": 329.2640075683594, + "t": 633.8328857421875, + "r": 436.4726867675781, + "b": 629.6668090820312, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 72 + ] + } + ] + } + ], + "headings": [ + "DocLayNet: A Large Human-Annotated Dataset for Document-Layout Analysis" + ] + } + }, + { + "text": "Peter Staar IBM Research Rueschlikon, Switzerland taa@zurich.ibm.com", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/100", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 7, + "bbox": { + "l": 329.2624206542969, + "t": 625.7568359375, + "r": 427.5014953613281, + "b": 621.548583984375, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 68 + ] + } + ] + } + ], + "headings": [ + "DocLayNet: A Large Human-Annotated Dataset for Document-Layout Analysis" + ] + } + }, + { + "text": "Accurate document layout analysis is a key requirement for highquality PDF document conversion. With the recent availability of public, large groundtruth datasets such as PubLayNet and DocBank, deep-learning models have proven to be very effective at layout detection and segmentation. While these datasets are of adequate size to train such models, they severely lack in layout variability since they are sourced from scientific article repositories such as PubMed and arXiv only. Consequently, the accuracy of the layout segmentation drops significantly when these models are applied on more challenging and diverse layouts. In this paper, we present DocLayNet , a new, publicly available, document-layout annotation dataset in COCO format. It contains 80863 manually annotated pages from diverse data sources to represent a wide variability in layouts. For each PDF page, the layout annotations provide labelled bounding-boxes with a choice of 11 distinct classes. DocLayNet also provides a subset of double- and triple-annotated pages to determine the inter-annotator agreement. In multiple experiments, we provide baseline accuracy scores (in mAP) for a set of popular object detection models. We also demonstrate that these models fall approximately 10% behind the inter-annotator agreement. Furthermore, we provide evidence that DocLayNet is of sufficient size. Lastly, we compare models trained on PubLayNet, DocBank and DocLayNet, showing that layout predictions of the DocLayNettrained models are more robust and thus the preferred choice for general-purpose document-layout analysis.", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/102", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 7, + "bbox": { + "l": 328.861083984375, + "t": 604.5524291992188, + "r": 528.3615112304688, + "b": 549.0685424804688, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 1594 + ] + } + ] + } + ], + "headings": [ + "ABSTRACT" + ] + } + }, + { + "text": "$_{\u00b7 Information systems }$\u2192$_{ Document structure ; \u00b7 Applied computing }$ \u2192$_{ Document analysis ; \u00b7 Computing methodologies }$\u2192$_{ Machine learning ;}$ Computer vision ; Object detection ;", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/104", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 7, + "bbox": { + "l": 329.4852600097656, + "t": 532.8919067382812, + "r": 516.2509155273438, + "b": 523.6624755859375, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 191 + ] + } + ] + } + ], + "headings": [ + "CCS CONCEPTS" + ] + } + }, + { + "text": "Permission to make digital or hard copies of part or all of this work for personal or classroom use is granted without fee provided that copies are not made or distributed for profit or commercial advantage and that copies bear this notice and the full citation on the first page. Copyrights for third-party components of this work must be honored. For all other uses, contact the owner/author(s).", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/105", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 7, + "bbox": { + "l": 329.1643371582031, + "t": 519.994873046875, + "r": 527.3062133789062, + "b": 506.2882080078125, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 397 + ] + } + ] + } + ], + "headings": [ + "CCS CONCEPTS" + ] + } + }, + { + "text": "KDD '22, August 14-18, 2022, Washington, DC, USA \u00a9 2022 Copyright held by the owner/author(s). ACM ISBN 978-1-4503-9385-0/22/08. https://doi.org/10.1145/3534678.3539043", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/106", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 7, + "bbox": { + "l": 329.1140441894531, + "t": 502.5775146484375, + "r": 513.2442016601562, + "b": 493.3287353515625, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 168 + ] + } + ] + } + ], + "headings": [ + "CCS CONCEPTS" + ] + } + }, + { + "text": "Figure 1: Four examples of complex page layouts across different document categories", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/107", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 7, + "bbox": { + "l": 329.0572509765625, + "t": 490.3890686035156, + "r": 445.8473205566406, + "b": 486.1141662597656, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 84 + ] + } + ] + } + ], + "headings": [ + "CCS CONCEPTS" + ] + } + }, + { + "text": "PDF document conversion, layout segmentation, object-detection, data set, Machine Learning", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/109", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 7, + "bbox": { + "l": 329.092529296875, + "t": 469.5487365722656, + "r": 454.5943603515625, + "b": 465.4438781738281, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 90 + ] + } + ] + } + ], + "headings": [ + "KEYWORDS" + ] + } + }, + { + "text": "Birgit Pfitzmann, Christoph Auer, Michele Dolfi, Ahmed S. Nassar, and Peter Staar. 2022. DocLayNet: A Large Human-Annotated Dataset for DocumentLayout Analysis. In Proceedings of the 28th ACM SIGKDD Conference on Knowledge Discovery and Data Mining (KDD '22), August 14-18, 2022, Washington, DC, USA. ACM, New York, NY, USA, 9 pages. https://doi.org/10.1145/ 3534678.3539043", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/111", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 7, + "bbox": { + "l": 328.9222412109375, + "t": 448.7705383300781, + "r": 528.159423828125, + "b": 435.41400146484375, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 374 + ] + } + ] + } + ], + "headings": [ + "ACM Reference Format:" + ] + } + }, + { + "text": "Figure 1: Four examples of complex page layouts across different document categories", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/112", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "caption", + "prov": [ + { + "page_no": 7, + "bbox": { + "l": 221.84927368164062, + "t": 499.2803955078125, + "r": 312.25115966796875, + "b": 490.75177001953125, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 84 + ] + } + ] + } + ], + "headings": [ + "ACM Reference Format:" + ] + } + }, + { + "text": "PDF document conversion, layout segmentation, object-detection, data set, Machine Learning", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/114", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 7, + "bbox": { + "l": 222.00753784179688, + "t": 474.62298583984375, + "r": 312.0212097167969, + "b": 465.4729919433594, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 90 + ] + } + ] + } + ], + "headings": [ + "KEYWORDS" + ] + } + }, + { + "text": "Birgit Pfitzmann, Christoph Auer, Michele Dolfi , Ahmed S. Nassar, and Peter Staar. 2022. DocLayNet: A Large Human-Annotated Dataset for DocumentLayout Analysis. In Proceedings of the 28th ACM SIGKDD Conference on Knowledge Discovery and Data Mining (KDD '22), August 14-18, 2022, Wash-$_{ington, DC, USA.}$ACM, New York, NY, USA, 9 pages. https://doi.org/10.1145/ 3534678.3539043", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/116", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 7, + "bbox": { + "l": 221.68344116210938, + "t": 458.718994140625, + "r": 312.1560974121094, + "b": 436.15557861328125, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 380 + ] + } + ] + } + ], + "headings": [ + "ACM Reference Format:" + ] + } + }, + { + "text": "1 INTRODUCTION", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/117", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 7, + "bbox": { + "l": 329.6015930175781, + "t": 428.9794921875, + "r": 373.37646484375, + "b": 423.8311462402344, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 14 + ] + } + ] + } + ], + "headings": [ + "ACM Reference Format:" + ] + } + }, + { + "text": "Despite the substantial improvements achieved with machine-learning (ML) approaches and deep neural networks in recent years, document conversion remains a challenging problem, as demonstrated by the numerous public competitions held on this topic [1-4]. The challenge originates from the huge variability in PDF documents regarding layout, language and formats (scanned, programmatic or a combination of both). Engineering a single ML model that can be applied on all types of documents and provides high-quality layout segmentation remains to this day extremely challenging [5]. To highlight the variability in document layouts, we show a few example documents from the DocLayNet dataset in Figure 1. Figure 2: Title page of the DocLayNet paper (arxiv .org/pdf/2206.01062) - left PDF, right rendered Markdown. If recognized, metadata such as authors are appearing first under the title. Text content inside figures is currently dropped, the caption is retained and linked to the figure in the JSON representation (not shown).", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/118", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 7, + "bbox": { + "l": 107.31889343261719, + "t": 420.2637939453125, + "r": 527.5916137695312, + "b": 377.62860107421875, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 1027 + ] + } + ] + } + ], + "headings": [ + "ACM Reference Format:" + ] + } + }, + { + "text": "7", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/119", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "page_footer", + "prov": [ + { + "page_no": 7, + "bbox": { + "l": 302.8258056640625, + "t": 49.2652587890625, + "r": 308.49029541015625, + "b": 39.960079193115234, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 1 + ] + } + ] + } + ], + "headings": [ + "ACM Reference Format:" + ] + } + }, + { + "text": "arXiv:2206.01062v1 [cs.CV] 2 Jun 2022", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/120", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "page_header", + "prov": [ + { + "page_no": 7, + "bbox": { + "l": 110.2352066040039, + "t": 618.2011108398438, + "r": 118.32157135009766, + "b": 492.749267578125, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 37 + ] + } + ] + } + ], + "headings": [ + "ACM Reference Format:" + ] + } + }, + { + "text": "KDD '22, August 14-18, 2022, Washington, DC, USA Birgit Pfitzmann, Christoph Auer, Michele Dolfi, Ahmed S. Nassar, and Peter Staar", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/121", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 8, + "bbox": { + "l": 122.18534851074219, + "t": 563.207763671875, + "r": 338.8071594238281, + "b": 558.6549682617188, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 130 + ] + } + ] + } + ], + "headings": [ + "ACM Reference Format:" + ] + } + }, + { + "text": "Table 2: Prediction performance (mAP@0.5-0.95) of object detection networks on DocLayNet test set. The MRCNN (Mask R-CNN) and FRCNN (Faster R-CNN) models with ResNet-50 or ResNet-101 backbone were trained based on the network architectures from the detectron2 model zoo (Mask R-CNN R50, R101-FPN 3x, Faster R-CNN R101-FPN 3x), with default configurations. The YOLO implementation utilized was YOLOv5x6 [13]. All models were initialised using pre-trained weights from the COCO 2017 dataset.", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/122", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "caption", + "prov": [ + { + "page_no": 8, + "bbox": { + "l": 122.11329650878906, + "t": 552.1026611328125, + "r": 226.37594604492188, + "b": 509.48504638671875, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 489 + ] + } + ] + } + ], + "headings": [ + "ACM Reference Format:" + ] + } + }, + { + "text": "to avoid this at any cost in order to have clear, unbiased baseline numbers for human document-layout annotation. Third, we introduced the feature of snapping boxes around text segments to obtain a pixel-accurate annotation and again reduce time and effort. The CCS annotation tool automatically shrinks every user-drawn box to the minimum bounding-box around the enclosed text-cells for all purely text-based segments, which excludes only Table and $_{Picture}$. For the latter, we instructed annotation staffto minimise inclusion of surrounding whitespace while including all graphical lines. A downside of snapping boxes to enclosed text cells is that some wrongly parsed PDF pages cannot be annotated correctly and need to be skipped. Fourth, we established a way toflag pages as rejected for cases where no valid annotation according to the label guidelines could be achieved. Example cases for this would be PDF pages that render incorrectly or contain layouts that are impossible to capture with non-overlapping rectangles. Such rejected pages are not contained in thefinal dataset. With all these measures in place, experienced annotation staffmanaged to annotate a single page in a typical timeframe of 20s to 60s, depending on its complexity.", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/123", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 8, + "bbox": { + "l": 121.85212707519531, + "t": 431.1610107421875, + "r": 226.33633422851562, + "b": 341.54669189453125, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 1252 + ] + } + ] + } + ], + "headings": [ + "ACM Reference Format:" + ] + } + }, + { + "text": "The primary goal of DocLayNet is to obtain high-quality ML models capable of accurate document-layout analysis on a wide variety of challenging layouts. As discussed in Section 2, object detection models are currently the easiest to use, due to the standardisation of ground-truth data in COCO format [16] and the availability of general frameworks such as detectron2 [17]. Furthermore, baseline numbers in PubLayNet and DocBank were obtained using standard object detection models such as Mask R-CNN and Faster R-CNN. As such, we will relate to these object detection methods in this", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/125", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 8, + "bbox": { + "l": 122.00563049316406, + "t": 327.5806884765625, + "r": 226.2816162109375, + "b": 284.8097229003906, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 584 + ] + } + ] + } + ], + "headings": [ + "5 EXPERIMENTS" + ] + } + }, + { + "text": "Figure 3: Page 6 of the DocLayNet paper. If recognized, metadata such as authors are appearing first under the title. Elements recognized as page headers or footers are suppressed in Markdown to deliver uninterrupted content in reading order. Tables are inserted in reading order. The paragraph in \"5. Experiments\" wrapping over the column end is broken up in two and interrupted by the table.", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/126", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "caption", + "prov": [ + { + "page_no": 8, + "bbox": { + "l": 107.26910400390625, + "t": 267.0020751953125, + "r": 504.2988586425781, + "b": 224.93768310546875, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 393 + ] + } + ] + } + ], + "headings": [ + "5 EXPERIMENTS" + ] + } + }, + { + "text": "Figure 5: Prediction performance (mAP@0.5-0.95) of a Mask R-CNN network with ResNet50 backbone trained on increasing fractions of the DocLayNet dataset. The learning curv eflattens around the 80% mark, indicating that increasing the size of the DocLayNet dataset with similar data will not yield significantly better predictions.", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/127", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 8, + "bbox": { + "l": 235.38954162597656, + "t": 469.9726867675781, + "r": 339.28778076171875, + "b": 441.4075927734375, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 329 + ] + } + ] + } + ], + "headings": [ + "5 EXPERIMENTS" + ] + } + }, + { + "text": "paper and leave the detailed evaluation of more recent methods mentioned in Section 2 for future work.", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/128", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 8, + "bbox": { + "l": 234.98081970214844, + "t": 425.5683898925781, + "r": 338.644775390625, + "b": 415.5873718261719, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 102 + ] + } + ] + } + ], + "headings": [ + "5 EXPERIMENTS" + ] + } + }, + { + "text": "In this section, we will present several aspects related to the performance of object detection models on DocLayNet. Similarly as in PubLayNet, we will evaluate the quality of their predictions using mean average precision (mAP) with 10 overlaps that range from 0.5 to 0.95 in steps of 0.05 (mAP@0.5-0.95). These scores are computed by leveraging the evaluation code provided by the COCO API [16].", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/129", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 8, + "bbox": { + "l": 234.98487854003906, + "t": 416.19970703125, + "r": 338.76287841796875, + "b": 382.79742431640625, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 397 + ] + } + ] + } + ], + "headings": [ + "5 EXPERIMENTS" + ] + } + }, + { + "text": "In Table 2, we present baseline experiments (given in mAP) on Mask R-CNN [12], Faster R-CNN [11], and YOLOv5 [13]. Both training and evaluation were performed on RGB images with dimensions of $^{1025}$\u00d71025 pixels. For training, we only used one annotation in case of redundantly annotated pages. As one can observe, the variation in mAP between the models is rather low, but overall between 6 and 10% lower than the mAP computed from the pairwise human annotations on triple-annotated pages. This gives a good indication that the DocLayNet dataset poses a worthwhile challenge for the research community to close the gap between human recognition and ML approaches. It is interesting to see that Mask R-CNN and Faster R-CNN produce very comparable mAP scores, indicating that pixel-based image segmentation derived from bounding-boxes does not help to obtain better predictions. On the other hand, the more recent Yolov5x model does very well and even out-performs humans on selected labels such as $_{Text}$, Table and $_{Picture}$. This is not entirely surprising, as and Picture are abundant and the most visually distinctive in a document.", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/131", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 8, + "bbox": { + "l": 235.06893920898438, + "t": 370.8502197265625, + "r": 338.89947509765625, + "b": 285.920654296875, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 1144 + ] + } + ] + } + ], + "headings": [ + "Baselines for Object Detection" + ] + } + }, + { + "text": "Prediclion Derormance (up80.5-0.85 ohobeci detecion lalo ks Doclaynal Lest saL Ine VACNN (Mask R-CNNI and FACNN (Faster A-CNM) modcs mith PosNc: 50 PosNo: 101 backtone woro trainod based on Enc nchwwcrk achrocturos tom Ihc Oeronhroase a-CNn aso rioi-Fpn Jx, FasieA-Cnn a1o1-FPN Jx), wilh delaui conlwuralions The YoUg mpomorcabon utilzod w2s YoloSyb(13| modos woro inbalsod usino cro-trunodmonhts hron Coco 2017 datasor", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/132", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 8, + "bbox": { + "l": 366.3333435058594, + "t": 563.0970458984375, + "r": 527.1106567382812, + "b": 547.0772705078125, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 419 + ] + } + ] + } + ], + "headings": [ + "Baselines for Object Detection" + ] + } + }, + { + "text": "iD avod Ihbs arcost cha unbasndbasolino numoc human cocumnnt-Laycut annotalion; Thrd Inirooucod leatura 0i snapoina Doxes around lerl scainunis cblan & pixel-accuiale annolaton and aJan feduce Bifre and elonThe CCS annoinbon aloMalca shruks Ovory Usor-drawnboro mnmum boundino-borarounaIho onclosod coxt-colls Purolytort basud scoitontwhich uxclldcs Ort Tatlo and Picluo latsor Inssucicdannjlabon sha mnim so inclusion Suitcurding mlospeco whloIncvon Oenoncang doans d0 oisnaocmnbors Onchse Ihal So10 wioogly Daisoc Pogcs Cannol be annotalcd coTcCEY and nccd supocd Foudn Oshdned Wuyio(aq Dagcs (ccclod Cases whcion valid anncuabon eccofding abeiqu Oelines coukbe acheneu Eamnole Case, flis wouk PDF peoe3 Ihal rendernnccrrecUy contanlavuta hat Imnosshk cantra milh Vananonnyogannio{ Suchiceciodoaoos not coralnon Ihofnn hr Aroknacoarreehetyn annollca slall nluuocd unnoln sina \" Puou lypical Lmnetamre 0l 20s 10 605 cecendnc conoanty", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/133", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 8, + "bbox": { + "l": 365.95367431640625, + "t": 447.0, + "r": 530.2679443359375, + "b": 405.3583984375, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 934 + ] + } + ] + } + ], + "headings": [ + "Baselines for Object Detection" + ] + } + }, + { + "text": "Ine crimary goal OocVAYNo cblan hion-quality Modols AccuaiodoaMoiuvana4s WMeVanalon chalcnonglayoul: Cecurdg echon Doicdi Delccion modcb rtene Casistlo Usc, Quulo Hhndandiubon ground-vuth data COCO lornat [16] and avaladloy enetal Irarnenoiks uch derectrcnz7] Furnemmcre, baseline nmnoe < I Putun Notand DocBank calanodusnsundad coict dosnchonmodols such Mas< A CNN and Fasior A CNN SuEna blraomhdelecfa nonInr Canacle", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/135", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 8, + "bbox": { + "l": 366.0, + "t": 391.0, + "r": 529.8655395507812, + "b": 370.37261962890625, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 418 + ] + } + ] + } + ], + "headings": [ + "5 EXPERIMENTS" + ] + } + }, + { + "text": "Fauri Prco chon ocrloianC( 005-095) ola Mask A-CNN ncthoik ilh AcsNciSo backbono brainod on incrcasing Iracbons oi DocLaynei calasot Tne loannp auro altons around Ih0 \u20ac03 noicahino Ihal inxreasing /e 520 Q Iho DocL\u00f8y Nel dalasot Amardaen nol Ycid sn: dorOocC Chons LAD", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/136", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 8, + "bbox": { + "l": 365.9671936035156, + "t": 367.0, + "r": 528.6666870117188, + "b": 354.9878845214844, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 268 + ] + } + ] + } + ], + "headings": [ + "5 EXPERIMENTS" + ] + } + }, + { + "text": "pangrandloave detallod evalvallon %moro rcoarimolhods monionan Secilg Jorhlure work", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/137", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 8, + "bbox": { + "l": 365.8995056152344, + "t": 351.3333435058594, + "r": 489.40869140625, + "b": 347.69952392578125, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 83 + ] + } + ] + } + ], + "headings": [ + "5 EXPERIMENTS" + ] + } + }, + { + "text": "Inuhs sechon All Deseni seur8/ asoecis reles00 Perormanoe ouieci celec on DoxclayNet Simamtas In PLoLaynnt oyuato tnn qualmy cuthnlr crodictionsusiramnanavnna prncisicn (TTAP) wch IDovrdaos that rangn trom 0 5ta 005 (nap,o6-00: Ml olue Fnoula Cvurbar uvalaion coou piayIed DY Ihu COCO API/161 ook", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/138", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 8, + "bbox": { + "l": 366.1520690917969, + "t": 344.3362731933594, + "r": 527.7802124023438, + "b": 332.3333435058594, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 296 + ] + } + ] + } + ], + "headings": [ + "5 EXPERIMENTS" + ] + } + }, + { + "text": "ptesenl baselne expenrnenls (Qvenin MAF) on Mas< R-CNN /121 Fasler F-CNN [11] an1 YOLOvS [13] Bou1 brann anavailang woropomormod AGa Imnoos vith dimonsions 1025 chxrols For tralring onN usodomannolatln Incaso ohcuunourfhunnolulco Dac3 Ohenn Vuruhoninptalunhamagny usnaroA en hn 10?7 loworrnannomap conoutec paicaisehuman anncrbons Aoo-amculeopnnos Ins Cves nacaton thatrhe DocLayNot daasci DOfo s mornwro clagnoo [csoarcncomrurt gap bctwoon human focogniticn and VL aporoaces nlelesuio IharNaska-CNNead Fasler GNincroova comnanen Maseoes nnocauna Ulbi AICBasodnanc scomrorubon oormvod Irom bounon)ooros Ooo{ abuin totcrorcochons Ontho chornnno Mcrocconi YolavSrmrodel does verywell und even Dul-Perdorins selectedlubels such Tedle undpcturl enbeh surcrisio Ta oloandPchre poincant amimemostasiaIN ishinsine documen: Ouau hnne", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/140", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 8, + "bbox": { + "l": 365.9697570800781, + "t": 317.6666564941406, + "r": 529.27099609375, + "b": 280.0965881347656, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 825 + ] + } + ] + } + ], + "headings": [ + "Baselines for Object Detection" + ] + } + }, + { + "text": "8", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/141", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "page_footer", + "prov": [ + { + "page_no": 8, + "bbox": { + "l": 303.0059509277344, + "t": 48.90887451171875, + "r": 308.49029541015625, + "b": 39.960079193115234, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 1 + ] + } + ] + } + ], + "headings": [ + "Baselines for Object Detection" + ] + } + }, + { + "text": "KDD '22, August 14-18, 2022, Washington, DC, USA Birgit Pfitzmann, Christoph Auer, Michele Dolfi, Ahmed S. Nassar, and Peter Staar", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/142", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 9, + "bbox": { + "l": 88.67599487304688, + "t": 598.9852294921875, + "r": 346.2541809082031, + "b": 593.6693115234375, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 130 + ] + } + ] + } + ], + "headings": [ + "Baselines for Object Detection" + ] + } + }, + { + "text": "Table 1: DocLayNet dataset overview. Along with the frequency of each class label, we present the relative occurrence (as % of row \"Total\") in the train, test and validation sets. The inter-annotator agreement is computed as the mAP@0.5-0.95 metric between pairwise annotations from the tripleannotated pages, from which we obtain accuracy ranges. Table 1: DocLayNet dataset overview. Along with the frequency of each class label, we present the relative occurr ence (as % of row \"Total\") in the train, test and validation sets. The inter-annotator agreement is computed as the mAP@0.5-0.95 metric between pairwise annotations from the triple-annotated pages, from which we obtain accuracy ranges. B", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/143", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "caption", + "prov": [ + { + "page_no": 9, + "bbox": { + "l": 88.52484130859375, + "t": 586.8209228515625, + "r": 525.9969482421875, + "b": 561.3492431640625, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 699 + ] + } + ] + } + ], + "headings": [ + "Baselines for Object Detection" + ] + } + }, + { + "text": "Figure 3: face. The laid te be drawn the respe", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/144", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "caption", + "prov": [ + { + "page_no": 9, + "bbox": { + "l": 88.67599487304688, + "t": 347.296630859375, + "r": 108.26393127441406, + "b": 318.76702880859375, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 46 + ] + } + ] + } + ], + "headings": [ + "Baselines for Object Detection" + ] + } + }, + { + "text": "we distribute d the annotation workload and performed continuous quality contr ols. Phase one and two required a small team of experts only. For phases three and four, a group of 40 dedicated annotators were assembled and supervised. Phase 1: Data selection and preparation. Our inclusion cri-", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/145", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 9, + "bbox": { + "l": 88.50696563720703, + "t": 306.8683776855469, + "r": 212.13279724121094, + "b": 277.8305358886719, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 293 + ] + } + ] + } + ], + "headings": [ + "Baselines for Object Detection" + ] + } + }, + { + "text": "of pages ed by seerties. For cument figur es or object how", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/146", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 9, + "bbox": { + "l": 327.33526611328125, + "t": 415.4449157714844, + "r": 347.025390625, + "b": 375.5401916503906, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 58 + ] + } + ] + } + ], + "headings": [ + "Baselines for Object Detection" + ] + } + }, + { + "text": "d the colfealayout labels. Pageand $_{Title}$. class cificity ed for of the ambiguous, while coverage ensures that all meaningful items on a page can be annotated. We refrained from class labels that are very specific to a document category, such as Abstract in the Scientific Articles category. We also avoided class labels that are tightly linked to the semantics of the text. Labels such as Author and $_{Affiliation}$, as seen", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/147", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "text", + "prov": [ + { + "page_no": 9, + "bbox": { + "l": 223.4002227783203, + "t": 370.67547607421875, + "r": 347.0276794433594, + "b": 280.1531982421875, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 430 + ] + } + ] + } + ], + "headings": [ + "Baselines for Object Detection" + ] + } + }, + { + "text": "teria for documents were described in Section 3. A large effort went into ensuring that all documents are free to use. The data sources in DocBank, are often only distinguishable by discriminating on $^{3}$https://arxiv.org/ Figure 4: Table 1 from the DocLayNet paper in the original PDF (A), as rendered Markdown (B) and in JSON representation (C). Spanning table cells, such as the multi-column header \"triple interannotator mAP@0.5-0.95 (%)\", is repeated for each column in the Markdown representation (B), which guarantees that every data point can be traced back to row and column headings only by its grid coordinates in the table. In the JSON representation, the span information is reflected in the fields of each table cell (C).", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/148", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "paragraph", + "prov": [ + { + "page_no": 9, + "bbox": { + "l": 88.67599487304688, + "t": 281.1365966796875, + "r": 504.1103515625, + "b": 213.95611572265625, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 737 + ] + } + ] + } + ], + "headings": [ + "Baselines for Object Detection" + ] + } + }, + { + "text": "9", + "meta": { + "doc_items": [ + { + "self_ref": "#/texts/149", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "page_footer", + "prov": [ + { + "page_no": 9, + "bbox": { + "l": 302.54315185546875, + "t": 49.2738037109375, + "r": 308.49029541015625, + "b": 39.96010971069336, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 1 + ] + } + ] + } + ], + "headings": [ + "Baselines for Object Detection" + ] + } + } + ] +} \ No newline at end of file diff --git a/test/test_chunk.py b/test/test_chunk.py deleted file mode 100644 index 83eda8a5..00000000 --- a/test/test_chunk.py +++ /dev/null @@ -1,16 +0,0 @@ -# -# Copyright IBM Corp. 2024 - 2024 -# SPDX-License-Identifier: MIT -# - -from docling_core.transforms.chunker.base import Chunk - - -def test_chunk_migration(): - input_path = "$.main-text[42]" # deprected path format - expected_path = "#/main-text/42" - chunk = Chunk( - path=input_path, - text="foo", - ) - assert chunk.path == expected_path diff --git a/test/test_hierarchical_chunker.py b/test/test_hierarchical_chunker.py index fe9cba10..7bd6f4c3 100644 --- a/test/test_hierarchical_chunker.py +++ b/test/test_hierarchical_chunker.py @@ -6,52 +6,33 @@ import json from docling_core.transforms.chunker import HierarchicalChunker -from docling_core.types import Document as DLDocument +from docling_core.transforms.chunker.hierarchical_chunker import Chunk +from docling_core.types.doc import DoclingDocument as DLDocument -def test_chunk_heading_in_text_wout_extras(): +def test_chunk_merge_list_items(): with open("test/data/chunker/0_inp_dl_doc.json") as f: data_json = f.read() dl_doc = DLDocument.model_validate_json(data_json) - chunker = HierarchicalChunker(heading_as_metadata=False, include_metadata=False) + chunker = HierarchicalChunker( + merge_list_items=True, + ) chunks = chunker.chunk(dl_doc=dl_doc) - act_data = dict(root=[n.model_dump(exclude_none=True) for n in chunks]) - with open("test/data/chunker/0_out_chunks_heading_in_text_wout_extras.json") as f: + act_data = dict(root=[Chunk.model_validate(n).export_json_dict() for n in chunks]) + with open("test/data/chunker/0_out_chunks.json") as f: exp_data = json.load(fp=f) assert exp_data == act_data -def test_chunk_heading_in_text_with_extras(): +def test_chunk_no_merge_list_items(): with open("test/data/chunker/0_inp_dl_doc.json") as f: data_json = f.read() dl_doc = DLDocument.model_validate_json(data_json) - chunker = HierarchicalChunker(heading_as_metadata=False, include_metadata=True) + chunker = HierarchicalChunker( + merge_list_items=False, + ) chunks = chunker.chunk(dl_doc=dl_doc) - act_data = dict(root=[n.model_dump(exclude_none=True) for n in chunks]) - with open("test/data/chunker/0_out_chunks_heading_in_text_with_extras.json") as f: - exp_data = json.load(fp=f) - assert exp_data == act_data - - -def test_chunk_heading_in_meta_wout_extras(): - with open("test/data/chunker/0_inp_dl_doc.json") as f: - data_json = f.read() - dl_doc = DLDocument.model_validate_json(data_json) - chunker = HierarchicalChunker(heading_as_metadata=True, include_metadata=False) - chunks = chunker.chunk(dl_doc=dl_doc) - act_data = dict(root=[n.model_dump(exclude_none=True) for n in chunks]) - with open("test/data/chunker/0_out_chunks_heading_in_meta_wout_extras.json") as f: - exp_data = json.load(fp=f) - assert exp_data == act_data - - -def test_chunk_heading_in_meta_with_extras(): - with open("test/data/chunker/0_inp_dl_doc.json") as f: - data_json = f.read() - dl_doc = DLDocument.model_validate_json(data_json) - chunker = HierarchicalChunker(heading_as_metadata=True, include_metadata=True) - chunks = chunker.chunk(dl_doc=dl_doc) - act_data = dict(root=[n.model_dump(exclude_none=True) for n in chunks]) - with open("test/data/chunker/0_out_chunks_heading_in_meta_with_extras.json") as f: + act_data = dict(root=[Chunk.model_validate(n).export_json_dict() for n in chunks]) + with open("test/data/chunker/1_out_chunks.json") as f: exp_data = json.load(fp=f) assert exp_data == act_data From eef8685d5af777e8c41a5f6a0f3a8227c2b3df75 Mon Sep 17 00:00:00 2001 From: Panos Vagenas <35837085+vagenas@users.noreply.github.com> Date: Wed, 16 Oct 2024 10:35:54 +0200 Subject: [PATCH 2/5] feat: add table support in chunker, incl. captions Signed-off-by: Panos Vagenas <35837085+vagenas@users.noreply.github.com> --- .../chunker/hierarchical_chunker.py | 50 +++++- test/data/chunker/0_out_chunks.json | 145 +++++++++++++++++ test/data/chunker/1_out_chunks.json | 147 +++++++++++++++++- 3 files changed, 334 insertions(+), 8 deletions(-) diff --git a/docling_core/transforms/chunker/hierarchical_chunker.py b/docling_core/transforms/chunker/hierarchical_chunker.py index d9315f47..7feb0051 100644 --- a/docling_core/transforms/chunker/hierarchical_chunker.py +++ b/docling_core/transforms/chunker/hierarchical_chunker.py @@ -10,6 +10,7 @@ import logging from typing import Any, ClassVar, Iterator, Optional +from pandas import DataFrame from pydantic import BaseModel, Field, conlist from docling_core.transforms.chunker import BaseChunker @@ -20,15 +21,14 @@ LevelNumber, ListItem, SectionHeaderItem, + TableItem, TextItem, ) from docling_core.types.doc.labels import DocItemLabel -_KEY_PATHS = "paths" -_KEY_PROVS = "provs" -_KEY_HEADINGS = "headings" - _KEY_DOC_ITEMS = "doc_items" +_KEY_HEADINGS = "headings" +_KEY_CAPTIONS = "captions" _logger = logging.getLogger(__name__) @@ -38,13 +38,16 @@ class ChunkMeta(BaseModel): # TODO align paths typewith _JSON_POINTER_REGEX doc_items: conlist(DocItem, min_length=1) = Field( # type: ignore - default=None, alias=_KEY_DOC_ITEMS, ) headings: Optional[conlist(str, min_length=1)] = Field( # type: ignore default=None, alias=_KEY_HEADINGS, ) + captions: Optional[conlist(str, min_length=1)] = Field( # type: ignore + default=None, + alias=_KEY_CAPTIONS, + ) excluded_embed: ClassVar[list[str]] = [_KEY_DOC_ITEMS] excluded_llm: ClassVar[list[str]] = [_KEY_DOC_ITEMS] @@ -78,6 +81,28 @@ class HierarchicalChunker(BaseChunker): merge_list_items: bool = True delim: str = "\n" + @classmethod + def _triplet_serialize(cls, table_df: DataFrame) -> str: + + # copy header as first row and shift all rows by one + table_df.loc[-1] = table_df.columns # type: ignore[call-overload] + table_df.index = table_df.index + 1 + table_df = table_df.sort_index() + + rows = [item.strip() for item in table_df.iloc[:, 0].to_list()] + cols = [item.strip() for item in table_df.iloc[0, :].to_list()] + + nrows = table_df.shape[0] + ncols = table_df.shape[1] + texts = [ + f"{rows[i]}, {cols[j]} = {str(table_df.iloc[i, j]).strip()}" + for i in range(1, nrows) + for j in range(1, ncols) + ] + output_text = ". ".join(texts) + + return output_text + def chunk(self, dl_doc: DLDocument, **kwargs: Any) -> Iterator[BaseChunk]: r"""Chunk the provided document. @@ -90,9 +115,10 @@ def chunk(self, dl_doc: DLDocument, **kwargs: Any) -> Iterator[BaseChunk]: heading_by_level: dict[LevelNumber, str] = {} list_items: list[TextItem] = [] for item, level in dl_doc.iterate_items(): - + captions = None if isinstance(item, DocItem): + # first handle any merging needed if self.merge_list_items: if isinstance( item, ListItem @@ -136,14 +162,24 @@ def chunk(self, dl_doc: DLDocument, **kwargs: Any) -> Iterator[BaseChunk]: (not self.merge_list_items) and isinstance(item, ListItem) ): text = item.text + elif isinstance(item, TableItem): + table_df = item.export_to_dataframe() + if table_df.shape[0] < 1 or table_df.shape[1] < 2: + # at least two cols needed, as first column contains row headers + continue + text = self._triplet_serialize(table_df=table_df) + captions = [ + c.text for c in [r.resolve(dl_doc) for r in item.captions] + ] or None else: - continue # TODO refine to ignore some cases & raise otherwise? + continue c = Chunk( text=text, meta=ChunkMeta( doc_items=[item], headings=[heading_by_level[k] for k in sorted(heading_by_level)] or None, + captions=captions, ), ) yield c diff --git a/test/data/chunker/0_out_chunks.json b/test/data/chunker/0_out_chunks.json index 8cfdaf79..3261e1da 100644 --- a/test/data/chunker/0_out_chunks.json +++ b/test/data/chunker/0_out_chunks.json @@ -1444,6 +1444,43 @@ ] } }, + { + "text": "Apple M3 Max, Thread budget.Thread budget = 4. Apple M3 Max, native backend.TTS = 177 s. Apple M3 Max, native backend.Pages/s = 1.27. Apple M3 Max, native backend.Mem = 6.20 GB. Apple M3 Max, pypdfium backend.TTS = 103 s. Apple M3 Max, pypdfium backend.Pages/s = 2.18. Apple M3 Max, pypdfium backend.Mem = 2.56 GB. (16 cores), Thread budget.Thread budget = 16. (16 cores), native backend.TTS = 167 s. (16 cores), native backend.Pages/s = 1.34. (16 cores), native backend.Mem = 6.20 GB. (16 cores), pypdfium backend.TTS = 92 s. (16 cores), pypdfium backend.Pages/s = 2.45. (16 cores), pypdfium backend.Mem = 2.56 GB. Intel(R) Xeon E5-2690, Thread budget.Thread budget = 4 16. Intel(R) Xeon E5-2690, native backend.TTS = 375 s 244 s. Intel(R) Xeon E5-2690, native backend.Pages/s = 0.60 0.92. Intel(R) Xeon E5-2690, native backend.Mem = 6.16 GB. Intel(R) Xeon E5-2690, pypdfium backend.TTS = 239 s 143 s. Intel(R) Xeon E5-2690, pypdfium backend.Pages/s = 0.94 1.57. Intel(R) Xeon E5-2690, pypdfium backend.Mem = 2.42 GB", + "meta": { + "doc_items": [ + { + "self_ref": "#/tables/0", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "table", + "prov": [ + { + "page_no": 5, + "bbox": { + "l": 133.71340942382812, + "t": 635.0601806640625, + "r": 477.5060729980469, + "b": 542.3740844726562, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 0 + ] + } + ] + } + ], + "headings": [ + "4 Performance" + ], + "captions": [ + "Table 1: Runtime characteristics of Docling with the standard model pipeline and settings, on our test dataset of 225 pages, on two different systems. OCR is disabled. We show the time-to-solution (TTS), computed throughput in pages per second, and the peak memory used (resident set size) for both the Docling-native PDF backend and for the pypdfium backend, using 4 and 16 threads." + ] + } + }, { "text": "Thanks to the high-quality, richly structured document conversion achieved by Docling, its output qualifies for numerous downstream applications. For example, Docling can provide a base for detailed enterprise document search, passage retrieval or classification use-cases, or support knowledge extraction pipelines, allowing specific treatment of different structures in the document, such as tables, figures, section structure or references. For popular generative AI application patterns, such as retrieval-augmented generation (RAG), we provide quackling , an open-source package which capitalizes on Docling's feature-rich document output to enable document-native optimized vector embedding and chunking. It plugs in seamlessly with LLM frameworks such as LlamaIndex [8]. Since Docling is fast, stable and cheap to run, it also makes for an excellent choice to build document-derived datasets. With its powerful table structure recognition, it provides significant benefit to automated knowledge-base construction [11, 10]. Docling is also integrated within the open IBM data prep kit [6], which implements scalable data transforms to build large-scale multi-modal training datasets.", "meta": { @@ -3072,6 +3109,43 @@ ] } }, + { + "text": "Caption, human.human = 84-89. Caption, MRCNN.R50 = 68.4. Caption, MRCNN.R101 = 71.5. Caption, FRCNN.R101 = 70.1. Caption, YOLO.v5x6 = 77.7. Footnote, human.human = 83-91. Footnote, MRCNN.R50 = 70.9. Footnote, MRCNN.R101 = 71.8. Footnote, FRCNN.R101 = 73.7. Footnote, YOLO.v5x6 = 77.2. Formula, human.human = 83-85. Formula, MRCNN.R50 = 60.1. Formula, MRCNN.R101 = 63.4. Formula, FRCNN.R101 = 63.5. Formula, YOLO.v5x6 = 66.2. List-item, human.human = 87-88. List-item, MRCNN.R50 = 81.2. List-item, MRCNN.R101 = 80.8. List-item, FRCNN.R101 = 81.0. List-item, YOLO.v5x6 = 86.2. Page-footer, human.human = 93-94. Page-footer, MRCNN.R50 = 61.6. Page-footer, MRCNN.R101 = 59.3. Page-footer, FRCNN.R101 = 58.9. Page-footer, YOLO.v5x6 = 61.1. Page-header, human.human = 85-89. Page-header, MRCNN.R50 = 71.9. Page-header, MRCNN.R101 = 70.0. Page-header, FRCNN.R101 = 72.0. Page-header, YOLO.v5x6 = 67.9. Picture, human.human = 69-71. Picture, MRCNN.R50 = 71.7. Picture, MRCNN.R101 = 72.7. Picture, FRCNN.R101 = . Picture, YOLO.v5x6 = 77.1. Section-header, human.human = 83-84. Section-header, MRCNN.R50 = 67.6. Section-header, MRCNN.R101 = 69.3. Section-header, FRCNN.R101 = 68.4. Section-header, YOLO.v5x6 = 74.6. Table, human.human = 77-81. Table, MRCNN.R50 = 82.2. Table, MRCNN.R101 = 82.9. Table, FRCNN.R101 = 82.2. Table, YOLO.v5x6 = 86.3. Text, human.human = 84-86. Text, MRCNN.R50 = 84.6. Text, MRCNN.R101 = 85.8. Text, FRCNN.R101 = 85.4. Text, YOLO.v5x6 = . , human.human = . , MRCNN.R50 = 76.7. , MRCNN.R101 = 80.4. , FRCNN.R101 = 79.9. , YOLO.v5x6 = 88.1. Title, human.human = 60-72. Title, MRCNN.R50 = . Title, MRCNN.R101 = . Title, FRCNN.R101 = . Title, YOLO.v5x6 = 82.7. All, human.human = 82-83. All, MRCNN.R50 = 72.4. All, MRCNN.R101 = 73.5. All, FRCNN.R101 = 73.4. All, YOLO.v5x6 = 76.8", + "meta": { + "doc_items": [ + { + "self_ref": "#/tables/1", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "table", + "prov": [ + { + "page_no": 8, + "bbox": { + "l": 125.886474609375, + "t": 505.5043640136719, + "r": 223.0053253173828, + "b": 437.8017578125, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 0 + ] + } + ] + } + ], + "headings": [ + "ACM Reference Format:" + ], + "captions": [ + "Table 2: Prediction performance (mAP@0.5-0.95) of object detection networks on DocLayNet test set. The MRCNN (Mask R-CNN) and FRCNN (Faster R-CNN) models with ResNet-50 or ResNet-101 backbone were trained based on the network architectures from the detectron2 model zoo (Mask R-CNN R50, R101-FPN 3x, Faster R-CNN R101-FPN 3x), with default configurations. The YOLO implementation utilized was YOLOv5x6 [13]. All models were initialised using pre-trained weights from the COCO 2017 dataset." + ] + } + }, { "text": "to avoid this at any cost in order to have clear, unbiased baseline numbers for human document-layout annotation. Third, we introduced the feature of snapping boxes around text segments to obtain a pixel-accurate annotation and again reduce time and effort. The CCS annotation tool automatically shrinks every user-drawn box to the minimum bounding-box around the enclosed text-cells for all purely text-based segments, which excludes only Table and $_{Picture}$. For the latter, we instructed annotation staffto minimise inclusion of surrounding whitespace while including all graphical lines. A downside of snapping boxes to enclosed text cells is that some wrongly parsed PDF pages cannot be annotated correctly and need to be skipped. Fourth, we established a way toflag pages as rejected for cases where no valid annotation according to the label guidelines could be achieved. Example cases for this would be PDF pages that render incorrectly or contain layouts that are impossible to capture with non-overlapping rectangles. Such rejected pages are not contained in thefinal dataset. With all these measures in place, experienced annotation staffmanaged to annotate a single page in a typical timeframe of 20s to 60s, depending on its complexity.", "meta": { @@ -3344,6 +3418,40 @@ ] } }, + { + "text": "Gaoon, noun = . Gaoon, Mrcnn = . Gaoon, MaCNN = . Gaoon, Frcne = . Gaoon, Yolo = . Foomolo, noun = . Foomolo, Mrcnn = . Foomolo, MaCNN = . Foomolo, Frcne = . Foomolo, Yolo = . Foula, noun = . Foula, Mrcnn = . Foula, MaCNN = . Foula, Frcne = . Foula, Yolo = . Ust-lern, noun = . Ust-lern, Mrcnn = . Ust-lern, MaCNN = . Ust-lern, Frcne = . Ust-lern, Yolo = . Page-locer, noun = . Page-locer, Mrcnn = . Page-locer, MaCNN = . Page-locer, Frcne = . Page-locer, Yolo = . Faqe-haje, noun = . Faqe-haje, Mrcnn = . Faqe-haje, MaCNN = . Faqe-haje, Frcne = . Faqe-haje, Yolo = . Pxlu, noun = . Pxlu, Mrcnn = . Pxlu, MaCNN = . Pxlu, Frcne = . Pxlu, Yolo = . Sonhoade, noun = . Sonhoade, Mrcnn = . Sonhoade, MaCNN = . Sonhoade, Frcne = . Sonhoade, Yolo = ", + "meta": { + "doc_items": [ + { + "self_ref": "#/tables/2", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "table", + "prov": [ + { + "page_no": 8, + "bbox": { + "l": 366.86639404296875, + "t": 542.9662475585938, + "r": 460.80865478515625, + "b": 450.93499755859375, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 0 + ] + } + ] + } + ], + "headings": [ + "Baselines for Object Detection" + ] + } + }, { "text": "iD avod Ihbs arcost cha unbasndbasolino numoc human cocumnnt-Laycut annotalion; Thrd Inirooucod leatura 0i snapoina Doxes around lerl scainunis cblan & pixel-accuiale annolaton and aJan feduce Bifre and elonThe CCS annoinbon aloMalca shruks Ovory Usor-drawnboro mnmum boundino-borarounaIho onclosod coxt-colls Purolytort basud scoitontwhich uxclldcs Ort Tatlo and Picluo latsor Inssucicdannjlabon sha mnim so inclusion Suitcurding mlospeco whloIncvon Oenoncang doans d0 oisnaocmnbors Onchse Ihal So10 wioogly Daisoc Pogcs Cannol be annotalcd coTcCEY and nccd supocd Foudn Oshdned Wuyio(aq Dagcs (ccclod Cases whcion valid anncuabon eccofding abeiqu Oelines coukbe acheneu Eamnole Case, flis wouk PDF peoe3 Ihal rendernnccrrecUy contanlavuta hat Imnosshk cantra milh Vananonnyogannio{ Suchiceciodoaoos not coralnon Ihofnn hr Aroknacoarreehetyn annollca slall nluuocd unnoln sina \" Puou lypical Lmnetamre 0l 20s 10 605 cecendnc conoanty", "meta": { @@ -3650,6 +3758,43 @@ ] } }, + { + "text": "Caption, Count. = 22524. Caption, % of Total.Train = 2.04. Caption, % of Total.Test = 1.77. Caption, % of Total.Val = 2.32. Caption, triple inter-annotator mAP @ 0.5-0.95 (%).All = 84-89. Caption, triple inter-annotator mAP @ 0.5-0.95 (%).Fin = 40-61. Caption, triple inter-annotator mAP @ 0.5-0.95 (%).Man = 86-92. Caption, triple inter-annotator mAP @ 0.5-0.95 (%).Sci = 94-99. Caption, triple inter-annotator mAP @ 0.5-0.95 (%).Law = 95-99. Caption, triple inter-annotator mAP @ 0.5-0.95 (%).Pat = 69-78. Caption, triple inter-annotator mAP @ 0.5-0.95 (%).T en = n/a. Footnote, Count. = 6318. Footnote, % of Total.Train = 0.60. Footnote, % of Total.Test = 0.31. Footnote, % of Total.Val = 0.58. Footnote, triple inter-annotator mAP @ 0.5-0.95 (%).All = 83-91. Footnote, triple inter-annotator mAP @ 0.5-0.95 (%).Fin = n/a. Footnote, triple inter-annotator mAP @ 0.5-0.95 (%).Man = 100. Footnote, triple inter-annotator mAP @ 0.5-0.95 (%).Sci = 62-88. Footnote, triple inter-annotator mAP @ 0.5-0.95 (%).Law = 85-94. Footnote, triple inter-annotator mAP @ 0.5-0.95 (%).Pat = n/a. Footnote, triple inter-annotator mAP @ 0.5-0.95 (%).T en = 82-97. Formula, Count. = 25027. Formula, % of Total.Train = 2.25. Formula, % of Total.Test = 1.90. Formula, % of Total.Val = 2.96. Formula, triple inter-annotator mAP @ 0.5-0.95 (%).All = 83-85. Formula, triple inter-annotator mAP @ 0.5-0.95 (%).Fin = . Formula, triple inter-annotator mAP @ 0.5-0.95 (%).Man = n/a. Formula, triple inter-annotator mAP @ 0.5-0.95 (%).Sci = 84-87. Formula, triple inter-annotator mAP @ 0.5-0.95 (%).Law = 86-96. Formula, triple inter-annotator mAP @ 0.5-0.95 (%).Pat = . Formula, triple inter-annotator mAP @ 0.5-0.95 (%).T en = n/a. List-item, Count. = 185660. List-item, % of Total.Train = 17.19. List-item, % of Total.Test = 13.34. List-item, % of Total.Val = 15.82. List-item, triple inter-annotator mAP @ 0.5-0.95 (%).All = 87-88. List-item, triple inter-annotator mAP @ 0.5-0.95 (%).Fin = 74-83. List-item, triple inter-annotator mAP @ 0.5-0.95 (%).Man = 90-92. List-item, triple inter-annotator mAP @ 0.5-0.95 (%).Sci = 97-97. List-item, triple inter-annotator mAP @ 0.5-0.95 (%).Law = 81-85. List-item, triple inter-annotator mAP @ 0.5-0.95 (%).Pat = 75-88. List-item, triple inter-annotator mAP @ 0.5-0.95 (%).T en = 93-95. Page-footer, Count. = 70878. Page-footer, % of Total.Train = 6.51. Page-footer, % of Total.Test = 5.58. Page-footer, % of Total.Val = 6.00. Page-footer, triple inter-annotator mAP @ 0.5-0.95 (%).All = 93-94. Page-footer, triple inter-annotator mAP @ 0.5-0.95 (%).Fin = 88-90. Page-footer, triple inter-annotator mAP @ 0.5-0.95 (%).Man = 95-96. Page-footer, triple inter-annotator mAP @ 0.5-0.95 (%).Sci = 100. Page-footer, triple inter-annotator mAP @ 0.5-0.95 (%).Law = 92-97. Page-footer, triple inter-annotator mAP @ 0.5-0.95 (%).Pat = 100. Page-footer, triple inter-annotator mAP @ 0.5-0.95 (%).T en = 96-98. Page-header, Count. = 58022. Page-header, % of Total.Train = 5.10. Page-header, % of Total.Test = 6.70. Page-header, % of Total.Val = 5.06. Page-header, triple inter-annotator mAP @ 0.5-0.95 (%).All = 85-89. Page-header, triple inter-annotator mAP @ 0.5-0.95 (%).Fin = 66-76. Page-header, triple inter-annotator mAP @ 0.5-0.95 (%).Man = 90-94. Page-header, triple inter-annotator mAP @ 0.5-0.95 (%).Sci = 98-100. Page-header, triple inter-annotator mAP @ 0.5-0.95 (%).Law = 91-92. Page-header, triple inter-annotator mAP @ 0.5-0.95 (%).Pat = 97-99. Page-header, triple inter-annotator mAP @ 0.5-0.95 (%).T en = 81-86. Picture, Count. = 45976. Picture, % of Total.Train = 4.21. Picture, % of Total.Test = 2.78. Picture, % of Total.Val = 5.31. Picture, triple inter-annotator mAP @ 0.5-0.95 (%).All = 69-71. Picture, triple inter-annotator mAP @ 0.5-0.95 (%).Fin = 56-59. Picture, triple inter-annotator mAP @ 0.5-0.95 (%).Man = 82-86. Picture, triple inter-annotator mAP @ 0.5-0.95 (%).Sci = 69-82. Picture, triple inter-annotator mAP @ 0.5-0.95 (%).Law = 80-95. Picture, triple inter-annotator mAP @ 0.5-0.95 (%).Pat = 66-71. Picture, triple inter-annotator mAP @ 0.5-0.95 (%).T en = 59-76. Section-header, Count. = 142884. Section-header, % of Total.Train = 12.60. Section-header, % of Total.Test = 15.77. Section-header, % of Total.Val = 12.85. Section-header, triple inter-annotator mAP @ 0.5-0.95 (%).All = 83-84. Section-header, triple inter-annotator mAP @ 0.5-0.95 (%).Fin = 76-81. Section-header, triple inter-annotator mAP @ 0.5-0.95 (%).Man = 90-92. Section-header, triple inter-annotator mAP @ 0.5-0.95 (%).Sci = 94-95. Section-header, triple inter-annotator mAP @ 0.5-0.95 (%).Law = 87-94. Section-header, triple inter-annotator mAP @ 0.5-0.95 (%).Pat = 69-73. Section-header, triple inter-annotator mAP @ 0.5-0.95 (%).T en = 78-86. Table, Count. = 34733. Table, % of Total.Train = 3.20. Table, % of Total.Test = 2.27. Table, % of Total.Val = 3.60. Table, triple inter-annotator mAP @ 0.5-0.95 (%).All = 77-81. Table, triple inter-annotator mAP @ 0.5-0.95 (%).Fin = 75-80. Table, triple inter-annotator mAP @ 0.5-0.95 (%).Man = 83-86. Table, triple inter-annotator mAP @ 0.5-0.95 (%).Sci = 98-99. Table, triple inter-annotator mAP @ 0.5-0.95 (%).Law = 58-80. Table, triple inter-annotator mAP @ 0.5-0.95 (%).Pat = 79-84. Table, triple inter-annotator mAP @ 0.5-0.95 (%).T en = 70-85. Text, Count. = 510377. Text, % of Total.Train = 45.82. Text, % of Total.Test = 49.28. Text, % of Total.Val = 45.00. Text, triple inter-annotator mAP @ 0.5-0.95 (%).All = 84-86. Text, triple inter-annotator mAP @ 0.5-0.95 (%).Fin = 81-86. Text, triple inter-annotator mAP @ 0.5-0.95 (%).Man = 88-93. Text, triple inter-annotator mAP @ 0.5-0.95 (%).Sci = 89-93. Text, triple inter-annotator mAP @ 0.5-0.95 (%).Law = 87-92. Text, triple inter-annotator mAP @ 0.5-0.95 (%).Pat = 71-79. Text, triple inter-annotator mAP @ 0.5-0.95 (%).T en = 87-95. Title, Count. = 5071. Title, % of Total.Train = 0.47. Title, % of Total.Test = 0.30. Title, % of Total.Val = 0.50. Title, triple inter-annotator mAP @ 0.5-0.95 (%).All = 60-72. Title, triple inter-annotator mAP @ 0.5-0.95 (%).Fin = 24-63. Title, triple inter-annotator mAP @ 0.5-0.95 (%).Man = 50-63. Title, triple inter-annotator mAP @ 0.5-0.95 (%).Sci = 94-100. Title, triple inter-annotator mAP @ 0.5-0.95 (%).Law = 82-96. Title, triple inter-annotator mAP @ 0.5-0.95 (%).Pat = 68-79. Title, triple inter-annotator mAP @ 0.5-0.95 (%).T en = 24-56. Total, Count. = 1107470. Total, % of Total.Train = 941123. Total, % of Total.Test = 99816. Total, % of Total.Val = 66531. Total, triple inter-annotator mAP @ 0.5-0.95 (%).All = 82-83. Total, triple inter-annotator mAP @ 0.5-0.95 (%).Fin = 71-74. Total, triple inter-annotator mAP @ 0.5-0.95 (%).Man = 79-81. Total, triple inter-annotator mAP @ 0.5-0.95 (%).Sci = 89-94. Total, triple inter-annotator mAP @ 0.5-0.95 (%).Law = 86-91. Total, triple inter-annotator mAP @ 0.5-0.95 (%).Pat = 71-76. Total, triple inter-annotator mAP @ 0.5-0.95 (%).T en = 68-85", + "meta": { + "doc_items": [ + { + "self_ref": "#/tables/3", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "table", + "prov": [ + { + "page_no": 9, + "bbox": { + "l": 110.8310546875, + "t": 560.6348876953125, + "r": 323.9291076660156, + "b": 477.7417297363281, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 0 + ] + } + ] + } + ], + "headings": [ + "Baselines for Object Detection" + ], + "captions": [ + "Table 1: DocLayNet dataset overview. Along with the frequency of each class label, we present the relative occurrence (as % of row \"Total\") in the train, test and validation sets. The inter-annotator agreement is computed as the mAP@0.5-0.95 metric between pairwise annotations from the tripleannotated pages, from which we obtain accuracy ranges. Table 1: DocLayNet dataset overview. Along with the frequency of each class label, we present the relative occurr ence (as % of row \"Total\") in the train, test and validation sets. The inter-annotator agreement is computed as the mAP@0.5-0.95 metric between pairwise annotations from the triple-annotated pages, from which we obtain accuracy ranges. B" + ] + } + }, { "text": "Figure 3: face. The laid te be drawn the respe", "meta": { diff --git a/test/data/chunker/1_out_chunks.json b/test/data/chunker/1_out_chunks.json index 005cd848..43130ac1 100644 --- a/test/data/chunker/1_out_chunks.json +++ b/test/data/chunker/1_out_chunks.json @@ -1494,6 +1494,43 @@ ] } }, + { + "text": "Apple M3 Max, Thread budget.Thread budget = 4. Apple M3 Max, native backend.TTS = 177 s. Apple M3 Max, native backend.Pages/s = 1.27. Apple M3 Max, native backend.Mem = 6.20 GB. Apple M3 Max, pypdfium backend.TTS = 103 s. Apple M3 Max, pypdfium backend.Pages/s = 2.18. Apple M3 Max, pypdfium backend.Mem = 2.56 GB. (16 cores), Thread budget.Thread budget = 16. (16 cores), native backend.TTS = 167 s. (16 cores), native backend.Pages/s = 1.34. (16 cores), native backend.Mem = 6.20 GB. (16 cores), pypdfium backend.TTS = 92 s. (16 cores), pypdfium backend.Pages/s = 2.45. (16 cores), pypdfium backend.Mem = 2.56 GB. Intel(R) Xeon E5-2690, Thread budget.Thread budget = 4 16. Intel(R) Xeon E5-2690, native backend.TTS = 375 s 244 s. Intel(R) Xeon E5-2690, native backend.Pages/s = 0.60 0.92. Intel(R) Xeon E5-2690, native backend.Mem = 6.16 GB. Intel(R) Xeon E5-2690, pypdfium backend.TTS = 239 s 143 s. Intel(R) Xeon E5-2690, pypdfium backend.Pages/s = 0.94 1.57. Intel(R) Xeon E5-2690, pypdfium backend.Mem = 2.42 GB", + "meta": { + "doc_items": [ + { + "self_ref": "#/tables/0", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "table", + "prov": [ + { + "page_no": 5, + "bbox": { + "l": 133.71340942382812, + "t": 635.0601806640625, + "r": 477.5060729980469, + "b": 542.3740844726562, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 0 + ] + } + ] + } + ], + "headings": [ + "4 Performance" + ], + "captions": [ + "Table 1: Runtime characteristics of Docling with the standard model pipeline and settings, on our test dataset of 225 pages, on two different systems. OCR is disabled. We show the time-to-solution (TTS), computed throughput in pages per second, and the peak memory used (resident set size) for both the Docling-native PDF backend and for the pypdfium backend, using 4 and 16 threads." + ] + } + }, { "text": "Thanks to the high-quality, richly structured document conversion achieved by Docling, its output qualifies for numerous downstream applications. For example, Docling can provide a base for detailed enterprise document search, passage retrieval or classification use-cases, or support knowledge extraction pipelines, allowing specific treatment of different structures in the document, such as tables, figures, section structure or references. For popular generative AI application patterns, such as retrieval-augmented generation (RAG), we provide quackling , an open-source package which capitalizes on Docling's feature-rich document output to enable document-native optimized vector embedding and chunking. It plugs in seamlessly with LLM frameworks such as LlamaIndex [8]. Since Docling is fast, stable and cheap to run, it also makes for an excellent choice to build document-derived datasets. With its powerful table structure recognition, it provides significant benefit to automated knowledge-base construction [11, 10]. Docling is also integrated within the open IBM data prep kit [6], which implements scalable data transforms to build large-scale multi-modal training datasets.", "meta": { @@ -3262,6 +3299,43 @@ ] } }, + { + "text": "Caption, human.human = 84-89. Caption, MRCNN.R50 = 68.4. Caption, MRCNN.R101 = 71.5. Caption, FRCNN.R101 = 70.1. Caption, YOLO.v5x6 = 77.7. Footnote, human.human = 83-91. Footnote, MRCNN.R50 = 70.9. Footnote, MRCNN.R101 = 71.8. Footnote, FRCNN.R101 = 73.7. Footnote, YOLO.v5x6 = 77.2. Formula, human.human = 83-85. Formula, MRCNN.R50 = 60.1. Formula, MRCNN.R101 = 63.4. Formula, FRCNN.R101 = 63.5. Formula, YOLO.v5x6 = 66.2. List-item, human.human = 87-88. List-item, MRCNN.R50 = 81.2. List-item, MRCNN.R101 = 80.8. List-item, FRCNN.R101 = 81.0. List-item, YOLO.v5x6 = 86.2. Page-footer, human.human = 93-94. Page-footer, MRCNN.R50 = 61.6. Page-footer, MRCNN.R101 = 59.3. Page-footer, FRCNN.R101 = 58.9. Page-footer, YOLO.v5x6 = 61.1. Page-header, human.human = 85-89. Page-header, MRCNN.R50 = 71.9. Page-header, MRCNN.R101 = 70.0. Page-header, FRCNN.R101 = 72.0. Page-header, YOLO.v5x6 = 67.9. Picture, human.human = 69-71. Picture, MRCNN.R50 = 71.7. Picture, MRCNN.R101 = 72.7. Picture, FRCNN.R101 = . Picture, YOLO.v5x6 = 77.1. Section-header, human.human = 83-84. Section-header, MRCNN.R50 = 67.6. Section-header, MRCNN.R101 = 69.3. Section-header, FRCNN.R101 = 68.4. Section-header, YOLO.v5x6 = 74.6. Table, human.human = 77-81. Table, MRCNN.R50 = 82.2. Table, MRCNN.R101 = 82.9. Table, FRCNN.R101 = 82.2. Table, YOLO.v5x6 = 86.3. Text, human.human = 84-86. Text, MRCNN.R50 = 84.6. Text, MRCNN.R101 = 85.8. Text, FRCNN.R101 = 85.4. Text, YOLO.v5x6 = . , human.human = . , MRCNN.R50 = 76.7. , MRCNN.R101 = 80.4. , FRCNN.R101 = 79.9. , YOLO.v5x6 = 88.1. Title, human.human = 60-72. Title, MRCNN.R50 = . Title, MRCNN.R101 = . Title, FRCNN.R101 = . Title, YOLO.v5x6 = 82.7. All, human.human = 82-83. All, MRCNN.R50 = 72.4. All, MRCNN.R101 = 73.5. All, FRCNN.R101 = 73.4. All, YOLO.v5x6 = 76.8", + "meta": { + "doc_items": [ + { + "self_ref": "#/tables/1", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "table", + "prov": [ + { + "page_no": 8, + "bbox": { + "l": 125.886474609375, + "t": 505.5043640136719, + "r": 223.0053253173828, + "b": 437.8017578125, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 0 + ] + } + ] + } + ], + "headings": [ + "ACM Reference Format:" + ], + "captions": [ + "Table 2: Prediction performance (mAP@0.5-0.95) of object detection networks on DocLayNet test set. The MRCNN (Mask R-CNN) and FRCNN (Faster R-CNN) models with ResNet-50 or ResNet-101 backbone were trained based on the network architectures from the detectron2 model zoo (Mask R-CNN R50, R101-FPN 3x, Faster R-CNN R101-FPN 3x), with default configurations. The YOLO implementation utilized was YOLOv5x6 [13]. All models were initialised using pre-trained weights from the COCO 2017 dataset." + ] + } + }, { "text": "to avoid this at any cost in order to have clear, unbiased baseline numbers for human document-layout annotation. Third, we introduced the feature of snapping boxes around text segments to obtain a pixel-accurate annotation and again reduce time and effort. The CCS annotation tool automatically shrinks every user-drawn box to the minimum bounding-box around the enclosed text-cells for all purely text-based segments, which excludes only Table and $_{Picture}$. For the latter, we instructed annotation staffto minimise inclusion of surrounding whitespace while including all graphical lines. A downside of snapping boxes to enclosed text cells is that some wrongly parsed PDF pages cannot be annotated correctly and need to be skipped. Fourth, we established a way toflag pages as rejected for cases where no valid annotation according to the label guidelines could be achieved. Example cases for this would be PDF pages that render incorrectly or contain layouts that are impossible to capture with non-overlapping rectangles. Such rejected pages are not contained in thefinal dataset. With all these measures in place, experienced annotation staffmanaged to annotate a single page in a typical timeframe of 20s to 60s, depending on its complexity.", "meta": { @@ -3534,6 +3608,40 @@ ] } }, + { + "text": "Gaoon, noun = . Gaoon, Mrcnn = . Gaoon, MaCNN = . Gaoon, Frcne = . Gaoon, Yolo = . Foomolo, noun = . Foomolo, Mrcnn = . Foomolo, MaCNN = . Foomolo, Frcne = . Foomolo, Yolo = . Foula, noun = . Foula, Mrcnn = . Foula, MaCNN = . Foula, Frcne = . Foula, Yolo = . Ust-lern, noun = . Ust-lern, Mrcnn = . Ust-lern, MaCNN = . Ust-lern, Frcne = . Ust-lern, Yolo = . Page-locer, noun = . Page-locer, Mrcnn = . Page-locer, MaCNN = . Page-locer, Frcne = . Page-locer, Yolo = . Faqe-haje, noun = . Faqe-haje, Mrcnn = . Faqe-haje, MaCNN = . Faqe-haje, Frcne = . Faqe-haje, Yolo = . Pxlu, noun = . Pxlu, Mrcnn = . Pxlu, MaCNN = . Pxlu, Frcne = . Pxlu, Yolo = . Sonhoade, noun = . Sonhoade, Mrcnn = . Sonhoade, MaCNN = . Sonhoade, Frcne = . Sonhoade, Yolo = ", + "meta": { + "doc_items": [ + { + "self_ref": "#/tables/2", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "table", + "prov": [ + { + "page_no": 8, + "bbox": { + "l": 366.86639404296875, + "t": 542.9662475585938, + "r": 460.80865478515625, + "b": 450.93499755859375, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 0 + ] + } + ] + } + ], + "headings": [ + "Baselines for Object Detection" + ] + } + }, { "text": "iD avod Ihbs arcost cha unbasndbasolino numoc human cocumnnt-Laycut annotalion; Thrd Inirooucod leatura 0i snapoina Doxes around lerl scainunis cblan & pixel-accuiale annolaton and aJan feduce Bifre and elonThe CCS annoinbon aloMalca shruks Ovory Usor-drawnboro mnmum boundino-borarounaIho onclosod coxt-colls Purolytort basud scoitontwhich uxclldcs Ort Tatlo and Picluo latsor Inssucicdannjlabon sha mnim so inclusion Suitcurding mlospeco whloIncvon Oenoncang doans d0 oisnaocmnbors Onchse Ihal So10 wioogly Daisoc Pogcs Cannol be annotalcd coTcCEY and nccd supocd Foudn Oshdned Wuyio(aq Dagcs (ccclod Cases whcion valid anncuabon eccofding abeiqu Oelines coukbe acheneu Eamnole Case, flis wouk PDF peoe3 Ihal rendernnccrrecUy contanlavuta hat Imnosshk cantra milh Vananonnyogannio{ Suchiceciodoaoos not coralnon Ihofnn hr Aroknacoarreehetyn annollca slall nluuocd unnoln sina \" Puou lypical Lmnetamre 0l 20s 10 605 cecendnc conoanty", "meta": { @@ -3840,6 +3948,43 @@ ] } }, + { + "text": "Caption, Count. = 22524. Caption, % of Total.Train = 2.04. Caption, % of Total.Test = 1.77. Caption, % of Total.Val = 2.32. Caption, triple inter-annotator mAP @ 0.5-0.95 (%).All = 84-89. Caption, triple inter-annotator mAP @ 0.5-0.95 (%).Fin = 40-61. Caption, triple inter-annotator mAP @ 0.5-0.95 (%).Man = 86-92. Caption, triple inter-annotator mAP @ 0.5-0.95 (%).Sci = 94-99. Caption, triple inter-annotator mAP @ 0.5-0.95 (%).Law = 95-99. Caption, triple inter-annotator mAP @ 0.5-0.95 (%).Pat = 69-78. Caption, triple inter-annotator mAP @ 0.5-0.95 (%).T en = n/a. Footnote, Count. = 6318. Footnote, % of Total.Train = 0.60. Footnote, % of Total.Test = 0.31. Footnote, % of Total.Val = 0.58. Footnote, triple inter-annotator mAP @ 0.5-0.95 (%).All = 83-91. Footnote, triple inter-annotator mAP @ 0.5-0.95 (%).Fin = n/a. Footnote, triple inter-annotator mAP @ 0.5-0.95 (%).Man = 100. Footnote, triple inter-annotator mAP @ 0.5-0.95 (%).Sci = 62-88. Footnote, triple inter-annotator mAP @ 0.5-0.95 (%).Law = 85-94. Footnote, triple inter-annotator mAP @ 0.5-0.95 (%).Pat = n/a. Footnote, triple inter-annotator mAP @ 0.5-0.95 (%).T en = 82-97. Formula, Count. = 25027. Formula, % of Total.Train = 2.25. Formula, % of Total.Test = 1.90. Formula, % of Total.Val = 2.96. Formula, triple inter-annotator mAP @ 0.5-0.95 (%).All = 83-85. Formula, triple inter-annotator mAP @ 0.5-0.95 (%).Fin = . Formula, triple inter-annotator mAP @ 0.5-0.95 (%).Man = n/a. Formula, triple inter-annotator mAP @ 0.5-0.95 (%).Sci = 84-87. Formula, triple inter-annotator mAP @ 0.5-0.95 (%).Law = 86-96. Formula, triple inter-annotator mAP @ 0.5-0.95 (%).Pat = . Formula, triple inter-annotator mAP @ 0.5-0.95 (%).T en = n/a. List-item, Count. = 185660. List-item, % of Total.Train = 17.19. List-item, % of Total.Test = 13.34. List-item, % of Total.Val = 15.82. List-item, triple inter-annotator mAP @ 0.5-0.95 (%).All = 87-88. List-item, triple inter-annotator mAP @ 0.5-0.95 (%).Fin = 74-83. List-item, triple inter-annotator mAP @ 0.5-0.95 (%).Man = 90-92. List-item, triple inter-annotator mAP @ 0.5-0.95 (%).Sci = 97-97. List-item, triple inter-annotator mAP @ 0.5-0.95 (%).Law = 81-85. List-item, triple inter-annotator mAP @ 0.5-0.95 (%).Pat = 75-88. List-item, triple inter-annotator mAP @ 0.5-0.95 (%).T en = 93-95. Page-footer, Count. = 70878. Page-footer, % of Total.Train = 6.51. Page-footer, % of Total.Test = 5.58. Page-footer, % of Total.Val = 6.00. Page-footer, triple inter-annotator mAP @ 0.5-0.95 (%).All = 93-94. Page-footer, triple inter-annotator mAP @ 0.5-0.95 (%).Fin = 88-90. Page-footer, triple inter-annotator mAP @ 0.5-0.95 (%).Man = 95-96. Page-footer, triple inter-annotator mAP @ 0.5-0.95 (%).Sci = 100. Page-footer, triple inter-annotator mAP @ 0.5-0.95 (%).Law = 92-97. Page-footer, triple inter-annotator mAP @ 0.5-0.95 (%).Pat = 100. Page-footer, triple inter-annotator mAP @ 0.5-0.95 (%).T en = 96-98. Page-header, Count. = 58022. Page-header, % of Total.Train = 5.10. Page-header, % of Total.Test = 6.70. Page-header, % of Total.Val = 5.06. Page-header, triple inter-annotator mAP @ 0.5-0.95 (%).All = 85-89. Page-header, triple inter-annotator mAP @ 0.5-0.95 (%).Fin = 66-76. Page-header, triple inter-annotator mAP @ 0.5-0.95 (%).Man = 90-94. Page-header, triple inter-annotator mAP @ 0.5-0.95 (%).Sci = 98-100. Page-header, triple inter-annotator mAP @ 0.5-0.95 (%).Law = 91-92. Page-header, triple inter-annotator mAP @ 0.5-0.95 (%).Pat = 97-99. Page-header, triple inter-annotator mAP @ 0.5-0.95 (%).T en = 81-86. Picture, Count. = 45976. Picture, % of Total.Train = 4.21. Picture, % of Total.Test = 2.78. Picture, % of Total.Val = 5.31. Picture, triple inter-annotator mAP @ 0.5-0.95 (%).All = 69-71. Picture, triple inter-annotator mAP @ 0.5-0.95 (%).Fin = 56-59. Picture, triple inter-annotator mAP @ 0.5-0.95 (%).Man = 82-86. Picture, triple inter-annotator mAP @ 0.5-0.95 (%).Sci = 69-82. Picture, triple inter-annotator mAP @ 0.5-0.95 (%).Law = 80-95. Picture, triple inter-annotator mAP @ 0.5-0.95 (%).Pat = 66-71. Picture, triple inter-annotator mAP @ 0.5-0.95 (%).T en = 59-76. Section-header, Count. = 142884. Section-header, % of Total.Train = 12.60. Section-header, % of Total.Test = 15.77. Section-header, % of Total.Val = 12.85. Section-header, triple inter-annotator mAP @ 0.5-0.95 (%).All = 83-84. Section-header, triple inter-annotator mAP @ 0.5-0.95 (%).Fin = 76-81. Section-header, triple inter-annotator mAP @ 0.5-0.95 (%).Man = 90-92. Section-header, triple inter-annotator mAP @ 0.5-0.95 (%).Sci = 94-95. Section-header, triple inter-annotator mAP @ 0.5-0.95 (%).Law = 87-94. Section-header, triple inter-annotator mAP @ 0.5-0.95 (%).Pat = 69-73. Section-header, triple inter-annotator mAP @ 0.5-0.95 (%).T en = 78-86. Table, Count. = 34733. Table, % of Total.Train = 3.20. Table, % of Total.Test = 2.27. Table, % of Total.Val = 3.60. Table, triple inter-annotator mAP @ 0.5-0.95 (%).All = 77-81. Table, triple inter-annotator mAP @ 0.5-0.95 (%).Fin = 75-80. Table, triple inter-annotator mAP @ 0.5-0.95 (%).Man = 83-86. Table, triple inter-annotator mAP @ 0.5-0.95 (%).Sci = 98-99. Table, triple inter-annotator mAP @ 0.5-0.95 (%).Law = 58-80. Table, triple inter-annotator mAP @ 0.5-0.95 (%).Pat = 79-84. Table, triple inter-annotator mAP @ 0.5-0.95 (%).T en = 70-85. Text, Count. = 510377. Text, % of Total.Train = 45.82. Text, % of Total.Test = 49.28. Text, % of Total.Val = 45.00. Text, triple inter-annotator mAP @ 0.5-0.95 (%).All = 84-86. Text, triple inter-annotator mAP @ 0.5-0.95 (%).Fin = 81-86. Text, triple inter-annotator mAP @ 0.5-0.95 (%).Man = 88-93. Text, triple inter-annotator mAP @ 0.5-0.95 (%).Sci = 89-93. Text, triple inter-annotator mAP @ 0.5-0.95 (%).Law = 87-92. Text, triple inter-annotator mAP @ 0.5-0.95 (%).Pat = 71-79. Text, triple inter-annotator mAP @ 0.5-0.95 (%).T en = 87-95. Title, Count. = 5071. Title, % of Total.Train = 0.47. Title, % of Total.Test = 0.30. Title, % of Total.Val = 0.50. Title, triple inter-annotator mAP @ 0.5-0.95 (%).All = 60-72. Title, triple inter-annotator mAP @ 0.5-0.95 (%).Fin = 24-63. Title, triple inter-annotator mAP @ 0.5-0.95 (%).Man = 50-63. Title, triple inter-annotator mAP @ 0.5-0.95 (%).Sci = 94-100. Title, triple inter-annotator mAP @ 0.5-0.95 (%).Law = 82-96. Title, triple inter-annotator mAP @ 0.5-0.95 (%).Pat = 68-79. Title, triple inter-annotator mAP @ 0.5-0.95 (%).T en = 24-56. Total, Count. = 1107470. Total, % of Total.Train = 941123. Total, % of Total.Test = 99816. Total, % of Total.Val = 66531. Total, triple inter-annotator mAP @ 0.5-0.95 (%).All = 82-83. Total, triple inter-annotator mAP @ 0.5-0.95 (%).Fin = 71-74. Total, triple inter-annotator mAP @ 0.5-0.95 (%).Man = 79-81. Total, triple inter-annotator mAP @ 0.5-0.95 (%).Sci = 89-94. Total, triple inter-annotator mAP @ 0.5-0.95 (%).Law = 86-91. Total, triple inter-annotator mAP @ 0.5-0.95 (%).Pat = 71-76. Total, triple inter-annotator mAP @ 0.5-0.95 (%).T en = 68-85", + "meta": { + "doc_items": [ + { + "self_ref": "#/tables/3", + "parent": { + "$ref": "#/body" + }, + "children": [], + "label": "table", + "prov": [ + { + "page_no": 9, + "bbox": { + "l": 110.8310546875, + "t": 560.6348876953125, + "r": 323.9291076660156, + "b": 477.7417297363281, + "coord_origin": "BOTTOMLEFT" + }, + "charspan": [ + 0, + 0 + ] + } + ] + } + ], + "headings": [ + "Baselines for Object Detection" + ], + "captions": [ + "Table 1: DocLayNet dataset overview. Along with the frequency of each class label, we present the relative occurrence (as % of row \"Total\") in the train, test and validation sets. The inter-annotator agreement is computed as the mAP@0.5-0.95 metric between pairwise annotations from the tripleannotated pages, from which we obtain accuracy ranges. Table 1: DocLayNet dataset overview. Along with the frequency of each class label, we present the relative occurr ence (as % of row \"Total\") in the train, test and validation sets. The inter-annotator agreement is computed as the mAP@0.5-0.95 metric between pairwise annotations from the triple-annotated pages, from which we obtain accuracy ranges. B" + ] + } + }, { "text": "Figure 3: face. The laid te be drawn the respe", "meta": { @@ -4045,4 +4190,4 @@ } } ] -} \ No newline at end of file +} From 8e7f4448c2a76d772b370598eaaf9c8db713ee9b Mon Sep 17 00:00:00 2001 From: Panos Vagenas <35837085+vagenas@users.noreply.github.com> Date: Wed, 16 Oct 2024 14:01:18 +0200 Subject: [PATCH 3/5] use Field constraints instead of conlist, refactor chunking types Signed-off-by: Panos Vagenas <35837085+vagenas@users.noreply.github.com> --- docling_core/transforms/chunker/__init__.py | 7 +-- docling_core/transforms/chunker/base.py | 31 +++++++-- .../chunker/hierarchical_chunker.py | 63 ++++++++----------- .../transforms/id_generator/__init__.py | 9 --- docling_core/transforms/id_generator/base.py | 30 --------- .../transforms/id_generator/uuid_generator.py | 34 ---------- .../transforms/metadata_extractor/__init__.py | 10 --- .../transforms/metadata_extractor/base.py | 59 ----------------- test/test_hierarchical_chunker.py | 10 ++- 9 files changed, 63 insertions(+), 190 deletions(-) delete mode 100644 docling_core/transforms/id_generator/__init__.py delete mode 100644 docling_core/transforms/id_generator/base.py delete mode 100644 docling_core/transforms/id_generator/uuid_generator.py delete mode 100644 docling_core/transforms/metadata_extractor/__init__.py delete mode 100644 docling_core/transforms/metadata_extractor/base.py diff --git a/docling_core/transforms/chunker/__init__.py b/docling_core/transforms/chunker/__init__.py index 8b5585ba..3407614e 100644 --- a/docling_core/transforms/chunker/__init__.py +++ b/docling_core/transforms/chunker/__init__.py @@ -5,8 +5,5 @@ """Define the chunker types.""" -from docling_core.transforms.chunker.base import BaseChunk, BaseChunker # noqa -from docling_core.transforms.chunker.hierarchical_chunker import ( # noqa - Chunk, - HierarchicalChunker, -) +from docling_core.transforms.chunker.base import BaseChunk, BaseChunker, BaseMeta +from docling_core.transforms.chunker.hierarchical_chunker import HierarchicalChunker diff --git a/docling_core/transforms/chunker/base.py b/docling_core/transforms/chunker/base.py index cf39518c..1bc3a36f 100644 --- a/docling_core/transforms/chunker/base.py +++ b/docling_core/transforms/chunker/base.py @@ -5,22 +5,45 @@ """Define base classes for chunking.""" from abc import ABC, abstractmethod -from typing import Any, Iterator +from typing import Any, ClassVar, Iterator from pydantic import BaseModel from docling_core.types.doc import DoclingDocument as DLDocument +class BaseMeta(BaseModel): + """Metadata base class.""" + + excluded_embed: ClassVar[list[str]] = [] + excluded_llm: ClassVar[list[str]] = [] + + def export_json_dict(self) -> dict[str, Any]: + """Helper method for exporting non-None keys to JSON mode. + + Returns: + dict[str, Any]: The exported dictionary. + """ + return self.model_dump(mode="json", by_alias=True, exclude_none=True) + + class BaseChunk(BaseModel): - """Data model for base chunk.""" + """Chunk base class.""" text: str - meta: Any = None + meta: BaseMeta + + def export_json_dict(self) -> dict[str, Any]: + """Helper method for exporting non-None keys to JSON mode. + + Returns: + dict[str, Any]: The exported dictionary. + """ + return self.model_dump(mode="json", by_alias=True, exclude_none=True) class BaseChunker(BaseModel, ABC): - """Base class for Chunker.""" + """Chunker base class.""" @abstractmethod def chunk(self, dl_doc: DLDocument, **kwargs) -> Iterator[BaseChunk]: diff --git a/docling_core/transforms/chunker/hierarchical_chunker.py b/docling_core/transforms/chunker/hierarchical_chunker.py index 7feb0051..f2b7cc46 100644 --- a/docling_core/transforms/chunker/hierarchical_chunker.py +++ b/docling_core/transforms/chunker/hierarchical_chunker.py @@ -11,10 +11,9 @@ from typing import Any, ClassVar, Iterator, Optional from pandas import DataFrame -from pydantic import BaseModel, Field, conlist +from pydantic import Field -from docling_core.transforms.chunker import BaseChunker -from docling_core.transforms.chunker.base import BaseChunk +from docling_core.transforms.chunker import BaseChunk, BaseChunker, BaseMeta from docling_core.types.doc import DoclingDocument as DLDocument from docling_core.types.doc.document import ( DocItem, @@ -33,50 +32,42 @@ _logger = logging.getLogger(__name__) -class ChunkMeta(BaseModel): - """Data model for specific chunk metadata.""" +class DocMeta(BaseMeta): + """Data model for Hierarchical Chunker metadata.""" - # TODO align paths typewith _JSON_POINTER_REGEX - doc_items: conlist(DocItem, min_length=1) = Field( # type: ignore + doc_items: list[DocItem] = Field( alias=_KEY_DOC_ITEMS, + min_length=1, ) - headings: Optional[conlist(str, min_length=1)] = Field( # type: ignore + headings: Optional[list[str]] = Field( default=None, alias=_KEY_HEADINGS, + min_length=1, ) - captions: Optional[conlist(str, min_length=1)] = Field( # type: ignore + captions: Optional[list[str]] = Field( default=None, alias=_KEY_CAPTIONS, + min_length=1, ) excluded_embed: ClassVar[list[str]] = [_KEY_DOC_ITEMS] excluded_llm: ClassVar[list[str]] = [_KEY_DOC_ITEMS] - def export_json_dict(self) -> dict[str, Any]: - """Helper method for exporting non-None keys to JSON mode. - - Returns: - dict[str, Any]: The exported dictionary. - """ - return self.model_dump(mode="json", by_alias=True, exclude_none=True) +class DocChunk(BaseChunk): + """Data model for Hierarchical Chunker chunks.""" -class Chunk(BaseChunk): - """Data model for specific chunk.""" - - meta: ChunkMeta - - def export_json_dict(self) -> dict[str, Any]: - """Helper method for exporting non-None keys to JSON mode. - - Returns: - dict[str, Any]: The exported dictionary. - """ - return self.model_dump(mode="json", by_alias=True, exclude_none=True) + meta: DocMeta class HierarchicalChunker(BaseChunker): - """Chunker implementation leveraging the document layout.""" + r"""Chunker implementation leveraging the document layout. + + Args: + merge_list_items (bool): Whether to merge successive list items. + Defaults to True. + delim (str): Delimiter to use for merging text. Defaults to "\n". + """ merge_list_items: bool = True delim: str = "\n" @@ -129,9 +120,9 @@ def chunk(self, dl_doc: DLDocument, **kwargs: Any) -> Iterator[BaseChunk]: list_items.append(item) continue elif list_items: # need to yield - yield Chunk( + yield DocChunk( text=self.delim.join([i.text for i in list_items]), - meta=ChunkMeta( + meta=DocMeta( doc_items=list_items, headings=[ heading_by_level[k] @@ -148,7 +139,7 @@ def chunk(self, dl_doc: DLDocument, **kwargs: Any) -> Iterator[BaseChunk]: isinstance(item, TextItem) and item.label == DocItemLabel.SECTION_HEADER ): - # TODO second branch not needed after cleanup above: + # TODO second branch not needed once cleanup above complete: level = item.level if isinstance(item, SectionHeaderItem) else 1 heading_by_level[level] = item.text @@ -173,9 +164,9 @@ def chunk(self, dl_doc: DLDocument, **kwargs: Any) -> Iterator[BaseChunk]: ] or None else: continue - c = Chunk( + c = DocChunk( text=text, - meta=ChunkMeta( + meta=DocMeta( doc_items=[item], headings=[heading_by_level[k] for k in sorted(heading_by_level)] or None, @@ -185,9 +176,9 @@ def chunk(self, dl_doc: DLDocument, **kwargs: Any) -> Iterator[BaseChunk]: yield c if self.merge_list_items and list_items: # need to yield - yield Chunk( + yield DocChunk( text=self.delim.join([i.text for i in list_items]), - meta=ChunkMeta( + meta=DocMeta( doc_items=list_items, headings=[heading_by_level[k] for k in sorted(heading_by_level)] or None, diff --git a/docling_core/transforms/id_generator/__init__.py b/docling_core/transforms/id_generator/__init__.py deleted file mode 100644 index 8eb8d3c8..00000000 --- a/docling_core/transforms/id_generator/__init__.py +++ /dev/null @@ -1,9 +0,0 @@ -# -# Copyright IBM Corp. 2024 - 2024 -# SPDX-License-Identifier: MIT -# - -"""Define the ID generator types.""" - -from docling_core.transforms.id_generator.base import BaseIDGenerator # noqa -from docling_core.transforms.id_generator.uuid_generator import UUIDGenerator # noqa diff --git a/docling_core/transforms/id_generator/base.py b/docling_core/transforms/id_generator/base.py deleted file mode 100644 index 74cc1533..00000000 --- a/docling_core/transforms/id_generator/base.py +++ /dev/null @@ -1,30 +0,0 @@ -# -# Copyright IBM Corp. 2024 - 2024 -# SPDX-License-Identifier: MIT -# - -"""Base document ID generator module.""" - -from abc import ABC, abstractmethod -from typing import Any - -from docling_core.types.doc import DoclingDocument as DLDocument - - -class BaseIDGenerator(ABC): - """Document ID generator base class.""" - - @abstractmethod - def generate_id(self, doc: DLDocument, *args: Any, **kwargs: Any) -> str: - """Generate an ID for the given document. - - Args: - doc (DLDocument): document to generate ID for - - Raises: - NotImplementedError: in this abstract implementation - - Returns: - str: the generated ID - """ - raise NotImplementedError() diff --git a/docling_core/transforms/id_generator/uuid_generator.py b/docling_core/transforms/id_generator/uuid_generator.py deleted file mode 100644 index 8e8dcd90..00000000 --- a/docling_core/transforms/id_generator/uuid_generator.py +++ /dev/null @@ -1,34 +0,0 @@ -# -# Copyright IBM Corp. 2024 - 2024 -# SPDX-License-Identifier: MIT -# - -"""UUID-based ID generator module.""" - -from random import Random -from typing import Annotated, Any, Optional -from uuid import UUID - -from pydantic import BaseModel, Field - -from docling_core.transforms.id_generator import BaseIDGenerator -from docling_core.types.doc import DoclingDocument as DLDocument - - -class UUIDGenerator(BaseModel, BaseIDGenerator): - """UUID-based ID generator class.""" - - seed: Optional[int] = None - uuid_version: Annotated[int, Field(strict=True, ge=1, le=5)] = 4 - - def generate_id(self, doc: DLDocument, *args: Any, **kwargs: Any) -> str: - """Generate an ID for the given document. - - Args: - doc (DLDocument): document to generate ID for - - Returns: - str: the generated ID - """ - rd = Random(x=self.seed) - return str(UUID(int=rd.getrandbits(128), version=self.uuid_version)) diff --git a/docling_core/transforms/metadata_extractor/__init__.py b/docling_core/transforms/metadata_extractor/__init__.py deleted file mode 100644 index fc44f7b2..00000000 --- a/docling_core/transforms/metadata_extractor/__init__.py +++ /dev/null @@ -1,10 +0,0 @@ -# -# Copyright IBM Corp. 2024 - 2024 -# SPDX-License-Identifier: MIT -# - -"""Define the metadata extractor types.""" - -from docling_core.transforms.metadata_extractor.base import ( # noqa - BaseMetadataExtractor, -) diff --git a/docling_core/transforms/metadata_extractor/base.py b/docling_core/transforms/metadata_extractor/base.py deleted file mode 100644 index 6e846785..00000000 --- a/docling_core/transforms/metadata_extractor/base.py +++ /dev/null @@ -1,59 +0,0 @@ -# -# Copyright IBM Corp. 2024 - 2024 -# SPDX-License-Identifier: MIT -# - -"""Base metadata extractor module.""" - - -from abc import ABC, abstractmethod -from typing import Any - -from pydantic import BaseModel - -from docling_core.types.doc import DoclingDocument as DLDocument - - -class BaseMetadataExtractor(BaseModel, ABC): - """Metadata extractor base class.""" - - @abstractmethod - def get_metadata( - self, doc: DLDocument, *args: Any, **kwargs: Any - ) -> dict[str, Any]: - """Extract metadata for the given document. - - Args: - doc (DLDocument): document to extract metadata for - - Raises: - NotImplementedError: in this abstract implementation - - Returns: - dict[str, Any]: the extracted metadata - """ - raise NotImplementedError() - - @abstractmethod - def get_excluded_embed_metadata_keys(self) -> list[str]: - """Get metadata keys to exclude from embedding. - - Raises: - NotImplementedError: in this abstract implementation - - Returns: - list[str]: the metadata to exclude - """ - raise NotImplementedError() - - @abstractmethod - def get_excluded_llm_metadata_keys(self) -> list[str]: - """Get metadata keys to exclude from LLM generation. - - Raises: - NotImplementedError: in this abstract implementation - - Returns: - list[str]: the metadata to exclude - """ - raise NotImplementedError() diff --git a/test/test_hierarchical_chunker.py b/test/test_hierarchical_chunker.py index 7bd6f4c3..38504e79 100644 --- a/test/test_hierarchical_chunker.py +++ b/test/test_hierarchical_chunker.py @@ -6,7 +6,7 @@ import json from docling_core.transforms.chunker import HierarchicalChunker -from docling_core.transforms.chunker.hierarchical_chunker import Chunk +from docling_core.transforms.chunker.hierarchical_chunker import DocChunk from docling_core.types.doc import DoclingDocument as DLDocument @@ -18,7 +18,9 @@ def test_chunk_merge_list_items(): merge_list_items=True, ) chunks = chunker.chunk(dl_doc=dl_doc) - act_data = dict(root=[Chunk.model_validate(n).export_json_dict() for n in chunks]) + act_data = dict( + root=[DocChunk.model_validate(n).export_json_dict() for n in chunks] + ) with open("test/data/chunker/0_out_chunks.json") as f: exp_data = json.load(fp=f) assert exp_data == act_data @@ -32,7 +34,9 @@ def test_chunk_no_merge_list_items(): merge_list_items=False, ) chunks = chunker.chunk(dl_doc=dl_doc) - act_data = dict(root=[Chunk.model_validate(n).export_json_dict() for n in chunks]) + act_data = dict( + root=[DocChunk.model_validate(n).export_json_dict() for n in chunks] + ) with open("test/data/chunker/1_out_chunks.json") as f: exp_data = json.load(fp=f) assert exp_data == act_data From 599d0b24f5a879b2d768c849baadbe8e65565fd5 Mon Sep 17 00:00:00 2001 From: Panos Vagenas <35837085+vagenas@users.noreply.github.com> Date: Wed, 16 Oct 2024 14:07:51 +0200 Subject: [PATCH 4/5] revert unnecessary doc module change Signed-off-by: Panos Vagenas <35837085+vagenas@users.noreply.github.com> --- docling_core/types/doc/document.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docling_core/types/doc/document.py b/docling_core/types/doc/document.py index 95e81d39..3ff9b421 100644 --- a/docling_core/types/doc/document.py +++ b/docling_core/types/doc/document.py @@ -29,8 +29,7 @@ from docling_core.types.legacy_doc.tokens import DocumentToken Uint64 = typing.Annotated[int, Field(ge=0, le=(2**64 - 1))] -MAX_LEVEL_NR = 100 -LevelNumber = typing.Annotated[int, Field(ge=1, le=MAX_LEVEL_NR)] +LevelNumber = typing.Annotated[int, Field(ge=1, le=100)] CURRENT_VERSION: Final = "1.0.0" DEFAULT_EXPORT_LABELS = { From aab8f536d380d208c026036e56a3ea0617a736e7 Mon Sep 17 00:00:00 2001 From: Panos Vagenas <35837085+vagenas@users.noreply.github.com> Date: Wed, 16 Oct 2024 14:18:22 +0200 Subject: [PATCH 5/5] align test data with upstream changes Signed-off-by: Panos Vagenas <35837085+vagenas@users.noreply.github.com> --- test/data/chunker/0_inp_dl_doc.json | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/test/data/chunker/0_inp_dl_doc.json b/test/data/chunker/0_inp_dl_doc.json index fcd336f8..cff8414e 100644 --- a/test/data/chunker/0_inp_dl_doc.json +++ b/test/data/chunker/0_inp_dl_doc.json @@ -4444,7 +4444,7 @@ "captions": [], "references": [], "footnotes": [], - "data": {} + "annotations": [] }, { "self_ref": "#/pictures/1", @@ -4476,7 +4476,7 @@ ], "references": [], "footnotes": [], - "data": {} + "annotations": [] }, { "self_ref": "#/pictures/2", @@ -4508,7 +4508,7 @@ ], "references": [], "footnotes": [], - "data": {} + "annotations": [] }, { "self_ref": "#/pictures/3", @@ -4536,7 +4536,7 @@ "captions": [], "references": [], "footnotes": [], - "data": {} + "annotations": [] }, { "self_ref": "#/pictures/4", @@ -4564,7 +4564,7 @@ "captions": [], "references": [], "footnotes": [], - "data": {} + "annotations": [] }, { "self_ref": "#/pictures/5", @@ -4592,7 +4592,7 @@ "captions": [], "references": [], "footnotes": [], - "data": {} + "annotations": [] }, { "self_ref": "#/pictures/6", @@ -4624,7 +4624,7 @@ ], "references": [], "footnotes": [], - "data": {} + "annotations": [] }, { "self_ref": "#/pictures/7", @@ -4656,7 +4656,7 @@ ], "references": [], "footnotes": [], - "data": {} + "annotations": [] }, { "self_ref": "#/pictures/8", @@ -4684,7 +4684,7 @@ "captions": [], "references": [], "footnotes": [], - "data": {} + "annotations": [] }, { "self_ref": "#/pictures/9", @@ -4712,7 +4712,7 @@ "captions": [], "references": [], "footnotes": [], - "data": {} + "annotations": [] } ], "tables": [