Skip to content

Commit

Permalink
feat: Update Toolbox for OCR 2.0 features (#171)
Browse files Browse the repository at this point in the history
* Add Symbol to Page wrapper

- Refactored classes to use a Base Class and greatly simplified the file.

* Add MathFormula

* Update Quickstart Sample and inline samples for creation methods

* Update tests to improve coverage

* simplify code for `from_document_path()`

* Fix import statement order

* Update based on review comments

* Add Class attributes to docstring

* Added clarification in docstring about Symbols
  • Loading branch information
holtskinner committed Oct 18, 2023
1 parent 209a10a commit e4344c9
Show file tree
Hide file tree
Showing 6 changed files with 33,830 additions and 114 deletions.
26 changes: 15 additions & 11 deletions google/cloud/documentai_toolbox/wrappers/document.py
Original file line number Diff line number Diff line change
Expand Up @@ -390,19 +390,19 @@ def from_document_path(
Document:
A document from local `document_path`.
"""
document_paths = [document_path]
document_paths = (
glob.glob(os.path.join(document_path, f"*{constants.JSON_EXTENSION}"))
if os.path.isdir(document_path)
else [document_path]
)

if os.path.isdir(document_path):
document_paths = glob.glob(
os.path.join(document_path, f"*{constants.JSON_EXTENSION}")
documents = [
documentai.Document.from_json(
open(file_path, "r", encoding="utf-8").read(),
ignore_unknown_fields=True,
)

documents = []
for file_path in document_paths:
with open(file_path, "r", encoding="utf-8") as f:
documents.append(
documentai.Document.from_json(f.read(), ignore_unknown_fields=True)
)
for file_path in document_paths
]

return cls(shards=documents)

Expand Down Expand Up @@ -474,10 +474,12 @@ def from_batch_process_metadata(
.. code-block:: python
from google.cloud import documentai
from google.cloud.documentai_toolbox import document
operation = client.batch_process_documents(request)
operation.result(timeout=timeout)
metadata = documentai.BatchProcessMetadata(operation.metadata)
wrapped_document = document.Document.from_batch_process_metadata(metadata)
Args:
metadata (documentai.BatchProcessMetadata):
Expand Down Expand Up @@ -507,9 +509,11 @@ def from_batch_process_operation(
.. code-block:: python
from google.cloud import documentai
from google.cloud.documentai_toolbox import document
operation = client.batch_process_documents(request)
operation_name = operation.operation.name
wrapped_document = document.Document.from_batch_process_operation(operation_name)
Args:
location (str):
Expand Down
Loading

0 comments on commit e4344c9

Please sign in to comment.