Skip to content
This repository was archived by the owner on Mar 6, 2026. It is now read-only.
This repository was archived by the owner on Mar 6, 2026. It is now read-only.

Blank Documents cause hOCR export to fail #160

@holtskinner

Description

@holtskinner

Reported in Cloud Support case 46457117

When using hOCR export on a blank document, the following error occurs:

ERROR 2023-08-14T23:00:32.440935Z Exception on 
/v1/projects/expinteligente-pro-mx/processors/68274a05ce445645:ocr [POST] 
Traceback (most recent call last): File 
"/usr/local/lib/python3.8/site-packages/flask/app.py", line 1516, in 
full_dispatch_request rv = self.dispatch_request() File 
"/usr/local/lib/python3.8/site-packages/flask/app.py", line 1502, in 
dispatch_request return 
self.ensure_sync(self.view_functions[rule.endpoint])(**req.view_args) File 
"/usr/local/lib/python3.8/site-packages/flask_restful/__init__.py", line 
467, in wrapper resp = resource(*args, **kwargs) File 
"/usr/local/lib/python3.8/site-packages/flask/views.py", line 84, in view 
return current_app.ensure_sync(self.dispatch_request)(*args, **kwargs) File 
"/usr/local/lib/python3.8/site-packages/flask_restful/__init__.py", line 
582, in dispatch_request resp = meth(*args, **kwargs) File 
"/app/controller/ocr.py", line 29, in post service_response = 
service.process_document(project_id, processor_id, request.get_data(), 
request.headers.get(_HEADER_CONTENT_TYPE)) File 
"/usr/local/lib/python3.8/site-packages/document_ai/helper/decorators.py", 
line 45, in wrapper result = fn(*args, **kwargs) File 
"/app/service/ocr/ocr_service.py", line 52, in process_document str_hocr = 
wrapped_document.to_hocr(filename="hocr") File 
"/usr/local/lib/python3.8/site-packages/google/cloud/documentai_toolbox/wrappers/document.py", 
line 767, in to_hocr hocr = _Hocr(documentai_pages=self.pages, 
documentai_text=self.text,filename=filename) File "<string>", line 6, in 
__init__ File 
"/usr/local/lib/python3.8/site-packages/google/cloud/documentai_toolbox/wrappers/hocr.py", 
line 200, in __post_init__ self.hocr_pages = 
_load_pages(pages=(self.documentai_pages), 
document_text=(self.documentai_text),filename=self.filename) File 
"/usr/local/lib/python3.8/site-packages/google/cloud/documentai_toolbox/wrappers/hocr.py", 
line 69, in _load_pages 
res_pages.append(_HocrPage(documentai_page=page.documentai_page, 
document_text=document_text, filename=filename)) File "<string>", line 6, 
in __init__ File 
"/usr/local/lib/python3.8/site-packages/google/cloud/documentai_toolbox/wrappers/hocr.py", 
line 176, in __post_init__ self.bounding_box = 
_get_bounding_box(element_with_layout=(self.documentai_page), 
dimensions=(self.documentai_page.dimension)) File 
"/usr/local/lib/python3.8/site-packages/google/cloud/documentai_toolbox/wrappers/hocr.py", 
line 13, in _get_bounding_box min_x, min_y = element_wi…

 {

  "textPayload": "Exception on 
/v1/projects/expinteligente-pro-mx/processors/68274a05ce445645:ocr 
[POST]
Traceback (most recent call last):
 File 
"/usr/local/lib/python3.8/site-packages/flask/app.py", line 1516, in 
full_dispatch_request
  rv = self.dispatch_request()
 File 
"/usr/local/lib/python3.8/site-packages/flask/app.py", line 1502, in 
dispatch_request
  return 
self.ensure_sync(self.view_functions[rule.endpoint])(**req.view_args)
 File 
"/usr/local/lib/python3.8/site-packages/flask_restful/__init__.py", line 
467, in wrapper
  resp = resource(*args, **kwargs)
 File 
"/usr/local/lib/python3.8/site-packages/flask/views.py", line 84, in 
view
  return current_app.ensure_sync(self.dispatch_request)(*args, 
**kwargs)
 File 
"/usr/local/lib/python3.8/site-packages/flask_restful/__init__.py", line 
582, in dispatch_request
  resp = meth(*args, **kwargs)
 File 
"/app/controller/ocr.py", line 29, in post
  service_response = 
service.process_document(project_id, processor_id, request.get_data(), 
request.headers.get(_HEADER_CONTENT_TYPE))
 File 
"/usr/local/lib/python3.8/site-packages/document_ai/helper/decorators.py", 
line 45, in wrapper
  result = fn(*args, **kwargs)
 File 
"/app/service/ocr/ocr_service.py", line 52, in 
process_document
  str_hocr = 
wrapped_document.to_hocr(filename="hocr")
 File 
"/usr/local/lib/python3.8/site-packages/google/cloud/documentai_toolbox/wrappers/document.py", 
line 767, in to_hocr
  hocr = _Hocr(documentai_pages=self.pages, 
documentai_text=self.text,filename=filename)
 File "<string>", line 6, 
in __init__
 File 
"/usr/local/lib/python3.8/site-packages/google/cloud/documentai_toolbox/wrappers/hocr.py", 
line 200, in __post_init__
  self.hocr_pages = 
_load_pages(pages=(self.documentai_pages), 
document_text=(self.documentai_text),filename=self.filename)
 File 
"/usr/local/lib/python3.8/site-packages/google/cloud/documentai_toolbox/wrappers/hocr.py", 
line 69, in 
_load_pages
  res_pages.append(_HocrPage(documentai_page=page.documentai_page, 
document_text=document_text, filename=filename))
 File "<string>", line 
6, in __init__
 File 
"/usr/local/lib/python3.8/site-packages/google/cloud/documentai_toolbox/wrappers/hocr.py", 
line 176, in __post_init__
  self.bounding_box = 
_get_bounding_box(element_with_layout=(self.documentai_page), 
dimensions=(self.documentai_page.dimension))
 File 
"/usr/local/lib/python3.8/site-packages/google/cloud/documentai_toolbox/wrappers/hocr.py", 
line 13, in _get_bounding_box
  min_x, min_y = 
element_with_layout.layout.bounding_poly.normalized_vertices[0].x, 
element_with_layout.layout.bounding_poly.normalized_vertices[0].y
 File 
"/usr/local/lib/python3.8/site-packages/proto/marshal/collections/repeated.py", 
line 125, in __getitem__
  return self._marshal.to_python(self._pb_type, 
self.pb[key])
IndexError: list index out of range",

  "insertId": "y4nox8fac86tf",

  "resource": {

   "type": "cloud_run_revision",

   "labels": {

    "configuration_name": "",

    "project_id": "ieproxy-pro-xg",

    "revision_name": "",

    "service_name": "",

    "location": ""

   }

  },

  "timestamp": "2023-08-14T23:00:32.440935Z",

  "severity": "ERROR",

  "labels": {

   "configuration_name": "documentai-gateway-02",

   "revision_name": "documentai-gateway-02-00015-df2",

   "version_id": "0.4.0",

   "service_name": "documentai-gateway-02",

   "location": "us-central1",

   "project_id": ""

  },

  "logName": "projects/ieproxy-pro-xg/logs/intelligent-extract",

  "trace": 
"projects/ieproxy-pro-xg/traces/l2le-1ab84559-b8b0-4e0b-90e0-44f1700302f1",
  "receiveTimestamp": "2023-08-14T23:00:32.526769271Z"
 }

Code

wrapped_document = 
documentai_document_wrapper.Document.from_documentai_document(documentai_document=result.document)

return wrapped_document.export_hocr_str(title="hocr")

Metadata

Metadata

Assignees

Labels

No labels
No labels

Type

No type

Projects

No projects

Milestone

No milestone

Relationships

None yet

Development

No branches or pull requests

Issue actions