Skip to content

Commit

Permalink
Add post init for formfields
Browse files Browse the repository at this point in the history
  • Loading branch information
holtskinner committed May 3, 2023
1 parent 11bcee8 commit b5e2023
Show file tree
Hide file tree
Showing 2 changed files with 28 additions and 55 deletions.
54 changes: 18 additions & 36 deletions google/cloud/documentai_toolbox/wrappers/page.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,15 +154,27 @@ class FormField:
Attributes:
documentai_formfield (google.cloud.documentai.Document.Page.FormField):
Required. The original google.cloud.documentai.Document.Page.FormField object.
text (str):
Required. UTF-8 encoded text in reading order from the document.
field_name (str):
Required. The form field name
field_value (str):
Required. The form field value
"""

documentai_formfield: dataclasses.InitVar[documentai.Document.Page.FormField]
field_name: str
field_value: str
text: dataclasses.InitVar[str]

field_name: str = dataclasses.field(init=False)
field_value: str = dataclasses.field(init=False)

def __post_init__(self, documentai_formfield, text):
self.field_name = _trim_text(
_text_from_layout(documentai_formfield.field_name, text)
)
self.field_value = _trim_text(
_text_from_layout(documentai_formfield.field_value, text)
)


def _text_from_layout(layout: documentai.Document.Page.Layout, text: str) -> str:
Expand Down Expand Up @@ -282,37 +294,6 @@ def _trim_text(text: str) -> str:
return text.strip().replace("\n", " ")


def _get_form_fields(
form_fields: List[documentai.Document.Page.FormField], text: str
) -> List[FormField]:
r"""Returns a list of FormField.
Args:
form_fields (List[documentai.Document.Page.FormField]):
Required. A list of documentai.Document.Page.FormField objects.
text (str):
Required. UTF-8 encoded text in reading order
from the document.
Returns:
List[FormField]:
A list of FormFields.
"""
result = []

for form_field in form_fields:
result.append(
FormField(
documentai_formfield=form_field,
field_name=_trim_text(_text_from_layout(form_field.field_name, text)),
field_value=_trim_text(
_text_from_layout(form_field.field_value, text),
),
)
)

return result


def _table_rows_from_documentai_table_rows(
table_rows: List[documentai.Document.Page.Table.TableRow], text: str
) -> List[List[str]]:
Expand Down Expand Up @@ -389,9 +370,10 @@ class Page:

def __post_init__(self, documentai_page, text):
self.page_number = int(documentai_page.page_number)
self.form_fields = _get_form_fields(
form_fields=documentai_page.form_fields, text=text
)
self.form_fields = [
FormField(documentai_formfield=form_field, text=text)
for form_field in documentai_page.form_fields
]
self.lines = _get_lines(lines=documentai_page.lines, text=text)
self.paragraphs = _get_paragraphs(
paragraphs=documentai_page.paragraphs, text=text
Expand Down
29 changes: 10 additions & 19 deletions tests/unit/test_page.py
Original file line number Diff line number Diff line change
Expand Up @@ -187,30 +187,17 @@ def test_get_lines(docproto):
assert lines[36].text == "Supplies used for Project Q.\n"


def test_get_form_fields(docproto_form_parser):
docproto_form_fields = docproto_form_parser.pages[0].form_fields

form_fields = page._get_form_fields(
form_fields=docproto_form_fields, text=docproto_form_parser.text
)

assert len(form_fields) == 17
assert form_fields[4].field_name == "Occupation:"
assert form_fields[4].field_value == "Software Engineer"


# Class init Tests


def test_FormField():
docai_form_field = documentai.Document.Page.FormField()
def test_FormField(docproto_form_parser):
documentai_formfield = docproto_form_parser.pages[0].form_fields[4]
form_field = page.FormField(
documentai_formfield=docai_form_field,
field_name="Name:",
field_value="Sally Walker",
documentai_formfield=documentai_formfield, text=docproto_form_parser.text
)
assert form_field.field_name == "Name:"
assert form_field.field_value == "Sally Walker"

assert form_field.field_name == "Occupation:"
assert form_field.field_value == "Software Engineer"


def test_Block():
Expand Down Expand Up @@ -253,6 +240,10 @@ def test_Page(docproto):
assert len(wrapped_page.lines) == 37
assert len(wrapped_page.paragraphs) == 31
assert len(wrapped_page.blocks) == 31
assert len(wrapped_page.form_fields) == 13

assert wrapped_page.lines[0].text == "Invoice\n"
assert wrapped_page.paragraphs[30].text == "Supplies used for Project Q.\n"
assert wrapped_page.blocks[30].text == "Supplies used for Project Q.\n"
assert wrapped_page.form_fields[0].field_name == "BALANCE DUE"
assert wrapped_page.form_fields[0].field_value == "$2140.00"

0 comments on commit b5e2023

Please sign in to comment.