docling-project · PeterStaar-IBM · Feb 14, 2025 · Feb 14, 2025 · Feb 14, 2025
diff --git a/docling_core/transforms/chunker/hierarchical_chunker.py b/docling_core/transforms/chunker/hierarchical_chunker.py
@@ -19,6 +19,7 @@
 from docling_core.transforms.chunker import BaseChunk, BaseChunker, BaseMeta
 from docling_core.types import DoclingDocument as DLDocument
 from docling_core.types.doc.document import (
+    CodeItem,
     DocItem,
     DocumentOrigin,
     LevelNumber,
@@ -199,8 +200,10 @@ def chunk(self, dl_doc: DLDocument, **kwargs: Any) -> Iterator[BaseChunk]:
                         heading_by_level.pop(k, None)
                     continue
 
-                if isinstance(item, TextItem) or (
-                    (not self.merge_list_items) and isinstance(item, ListItem)
+                if (
+                    isinstance(item, TextItem)
+                    or ((not self.merge_list_items) and isinstance(item, ListItem))
+                    or isinstance(item, CodeItem)
                 ):
                     text = item.text
                 elif isinstance(item, TableItem):

diff --git a/docling_core/types/doc/document.py b/docling_core/types/doc/document.py
@@ -677,51 +677,6 @@ def export_to_document_tokens(
         return body
 
 
-class CodeItem(TextItem):
-    """CodeItem."""
-
-    label: typing.Literal[DocItemLabel.CODE] = (
-        DocItemLabel.CODE  # type: ignore[assignment]
-    )
-    code_language: CodeLanguageLabel = CodeLanguageLabel.UNKNOWN
-
-    def export_to_document_tokens(
-        self,
-        doc: "DoclingDocument",
-        new_line: str = "",
-        xsize: int = 500,
-        ysize: int = 500,
-        add_location: bool = True,
-        add_content: bool = True,
-    ):
-        r"""Export text element to document tokens format.
-
-        :param doc: "DoclingDocument":
-        :param new_line: str (Default value = "")
-        :param xsize: int:  (Default value = 500)
-        :param ysize: int:  (Default value = 500)
-        :param add_location: bool:  (Default value = True)
-        :param add_content: bool:  (Default value = True)
-
-        """
-        body = f"{DocumentToken.BEG_CODE.value}{new_line}"
-
-        if add_location:
-            body += self.get_location_tokens(
-                doc=doc,
-                new_line=new_line,
-                xsize=xsize,
-                ysize=ysize,
-            )
-
-        if add_content and self.text is not None:
-            body += f"<_{self.code_language.value}_>{self.text}{new_line}"
-
-        body += f"{DocumentToken.END_CODE.value}\n"
-
-        return body
-
-
 class SectionHeaderItem(TextItem):
     """SectionItem."""
 
@@ -812,6 +767,53 @@ def get_image(self, doc: "DoclingDocument") -> Optional[PILImage.Image]:
         return super().get_image(doc=doc)
 
 
+class CodeItem(FloatingItem):
+    """CodeItem."""
+
+    label: typing.Literal[DocItemLabel.CODE] = (
+        DocItemLabel.CODE  # type: ignore[assignment]
+    )
+    orig: str  # untreated representation
+    text: str  # sanitized representation
+    code_language: CodeLanguageLabel = CodeLanguageLabel.UNKNOWN
+
+    def export_to_document_tokens(
+        self,
+        doc: "DoclingDocument",
+        new_line: str = "",
+        xsize: int = 500,
+        ysize: int = 500,
+        add_location: bool = True,
+        add_content: bool = True,
+    ):
+        r"""Export text element to document tokens format.
+
+        :param doc: "DoclingDocument":
+        :param new_line: str (Default value = "")
+        :param xsize: int:  (Default value = 500)
+        :param ysize: int:  (Default value = 500)
+        :param add_location: bool:  (Default value = True)
+        :param add_content: bool:  (Default value = True)
+
+        """
+        body = f"{DocumentToken.BEG_CODE.value}{new_line}"
+
+        if add_location:
+            body += self.get_location_tokens(
+                doc=doc,
+                new_line=new_line,
+                xsize=xsize,
+                ysize=ysize,
+            )
+
+        if add_content and self.text is not None:
+            body += f"<_{self.code_language.value}_>{self.text}{new_line}"
+
+        body += f"{DocumentToken.END_CODE.value}\n"
+
+        return body
+
+
 class PictureItem(FloatingItem):
     """PictureItem."""
 
@@ -1763,6 +1765,7 @@ def add_code(
         text: str,
         code_language: Optional[CodeLanguageLabel] = None,
         orig: Optional[str] = None,
+        caption: Optional[Union[TextItem, RefItem]] = None,
         prov: Optional[ProvenanceItem] = None,
         parent: Optional[NodeItem] = None,
         content_layer: Optional[ContentLayer] = None,
@@ -1772,6 +1775,8 @@ def add_code(
         :param text: str:
         :param code_language: Optional[str]: (Default value = None)
         :param orig: Optional[str]:  (Default value = None)
+        :param caption: Optional[Union[TextItem:
+        :param RefItem]]:  (Default value = None)
         :param prov: Optional[ProvenanceItem]:  (Default value = None)
         :param parent: Optional[NodeItem]:  (Default value = None)
         """
@@ -1795,6 +1800,8 @@ def add_code(
             code_item.content_layer = content_layer
         if prov:
             code_item.prov.append(prov)
+        if caption:
+            code_item.captions.append(caption.get_ref())
 
         self.texts.append(code_item)
         parent.children.append(RefItem(cref=cref))

diff --git a/docs/DoclingDocument.json b/docs/DoclingDocument.json
@@ -208,6 +208,41 @@
           "title": "Prov",
           "type": "array"
         },
+        "captions": {
+          "default": [],
+          "items": {
+            "$ref": "#/$defs/RefItem"
+          },
+          "title": "Captions",
+          "type": "array"
+        },
+        "references": {
+          "default": [],
+          "items": {
+            "$ref": "#/$defs/RefItem"
+          },
+          "title": "References",
+          "type": "array"
+        },
+        "footnotes": {
+          "default": [],
+          "items": {
+            "$ref": "#/$defs/RefItem"
+          },
+          "title": "Footnotes",
+          "type": "array"
+        },
+        "image": {
+          "anyOf": [
+            {
+              "$ref": "#/$defs/ImageRef"
+            },
+            {
+              "type": "null"
+            }
+          ],
+          "default": null
+        },
         "orig": {
           "title": "Orig",
           "type": "string"

diff --git a/test/data/docling_document/unit/CodeItem.yaml b/test/data/docling_document/unit/CodeItem.yaml
@@ -1,4 +1,8 @@
 children: []
+captions: []
+footnotes: []
+references: []
+image: null
 code_language: Python
 content_layer: body
 label: code