Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,7 @@
## 0.7.10-dev0

* fix: Skips ordering elements coming from Chipper

## 0.7.9

* Allow table model to accept optional OCR tokens
Expand Down
2 changes: 1 addition & 1 deletion unstructured_inference/__version__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
__version__ = "0.7.9" # pragma: no cover
__version__ = "0.7.10-dev0" # pragma: no cover
19 changes: 15 additions & 4 deletions unstructured_inference/inference/layout.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@
from unstructured_inference.inference.pdf import get_images_from_pdf_element
from unstructured_inference.logger import logger
from unstructured_inference.models.base import get_model
from unstructured_inference.models.chipper import UnstructuredChipperModel
from unstructured_inference.models.detectron2onnx import (
UnstructuredDetectronONNXModel,
)
Expand Down Expand Up @@ -252,8 +253,13 @@ def get_elements_with_detection_model(

else:
merged_layout = inferred_layout

elements = self.get_elements_from_layout(cast(List[TextRegion], merged_layout))
# If the model is a chipper model, we don't want to order the
# elements, as they are already ordered
order_elements = not isinstance(self.detection_model, UnstructuredChipperModel)
elements = self.get_elements_from_layout(
cast(List[TextRegion], merged_layout),
order_elements=order_elements,
)

if self.analysis:
self.inferred_layout = inferred_layout
Expand All @@ -264,10 +270,15 @@ def get_elements_with_detection_model(

return elements

def get_elements_from_layout(self, layout: List[TextRegion]) -> List[LayoutElement]:
def get_elements_from_layout(
self,
layout: List[TextRegion],
order_elements: bool = True,
) -> List[LayoutElement]:
"""Uses the given Layout to separate the page text into elements, either extracting the
text from the discovered layout blocks."""
layout = order_layout(layout)
if order_elements:
layout = order_layout(layout)
elements = [
get_element_from_block(
block=e,
Expand Down