Skip to content

Commit

Permalink
fix: Add handling for documents missing all layout elements. (#161)
Browse files Browse the repository at this point in the history
* fix: Add handling for documents missing all layout elements.

- Bounding boxes will show up as 0,0,0,0
- Fixes #160

* docs: Add information about return value of `get_bounding_box()`

* fix: Addressed coment and moved if statement
  • Loading branch information
holtskinner committed Sep 5, 2023
1 parent a702231 commit 1ac6f5e
Show file tree
Hide file tree
Showing 3 changed files with 16 additions and 0 deletions.
5 changes: 5 additions & 0 deletions google/cloud/documentai_toolbox/utilities/docai_utilities.py
Expand Up @@ -35,14 +35,19 @@ def get_bounding_box(
Returns:
Tuple[int, int, int, int]:
Bounding box coordinates in order (top, left, bottom, right).
Returns `0, 0, 0, 0` if `bounding_poly.normalized_vertices` is empty.
"""
if not bounding_poly.normalized_vertices:
return 0, 0, 0, 0

vertices = [
(
int(vertex.x * page_dimension.width + 0.5),
int(vertex.y * page_dimension.height + 0.5),
)
for vertex in bounding_poly.normalized_vertices
]

top, left = vertices[0]
bottom, right = vertices[2]
return top, left, bottom, right
1 change: 1 addition & 0 deletions tests/unit/resources/blank_document.json

Large diffs are not rendered by default.

10 changes: 10 additions & 0 deletions tests/unit/test_document.py
Expand Up @@ -661,6 +661,16 @@ def test_export_hocr_str():
assert actual_hocr == expected


def test_export_hocr_str_with_blank_document():
wrapped_document = document.Document.from_document_path(
document_path="tests/unit/resources/blank_document.json"
)

actual_hocr = wrapped_document.export_hocr_str(title="hocr_blank")

assert actual_hocr


def test_document_to_merged_documentai_document(get_bytes_multiple_files_mock):
wrapped_document = document.Document.from_gcs(
gcs_bucket_name="test-directory", gcs_prefix="documentai/output/123456789/1/"
Expand Down

0 comments on commit 1ac6f5e

Please sign in to comment.