Skip to content
This repository has been archived by the owner on Sep 20, 2023. It is now read-only.

Commit

Permalink
feat: added sharding_config field in DocumentOutputConfig.GcsOutputCo…
Browse files Browse the repository at this point in the history
…nfig in document_io.proto (#433)

* feat: added sharding_config field in DocumentOutputConfig.GcsOutputConfig in document_io.proto
feat: added process_options field in ProcessRequest in document_processor_service.proto
feat: added sample_document_uris field in ProcessorType in processor_type.proto

PiperOrigin-RevId: 495360288

Source-Link: googleapis/googleapis@5f39f46

Source-Link: googleapis/googleapis-gen@8520d57
Copy-Tag: eyJwIjoiLmdpdGh1Yi8uT3dsQm90LnlhbWwiLCJoIjoiODUyMGQ1NzA5M2QzNjBhZmVjMTk0Y2QwMjliNzEzMjRlNTk3ZjYyNiJ9

* 🦉 Updates from OwlBot post-processor

See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md

* feat: added sharding_config field in DocumentOutputConfig.GcsOutputConfig in document_io.proto
feat: added process_options field in ProcessRequest in document_processor_service.proto
feat: added sample_document_uris field in ProcessorType in processor_type.proto

PiperOrigin-RevId: 495363748

Source-Link: googleapis/googleapis@3dce9ff

Source-Link: googleapis/googleapis-gen@66f3518
Copy-Tag: eyJwIjoiLmdpdGh1Yi8uT3dsQm90LnlhbWwiLCJoIjoiNjZmMzUxODIxN2YwYzA0YTQ3ZmFlZmViMzMyMmZmMjI2MTA2Yjg1NyJ9

* 🦉 Updates from OwlBot post-processor

See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md

Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com>
  • Loading branch information
gcf-owl-bot[bot] and gcf-owl-bot[bot] committed Dec 14, 2022
1 parent 9fcaff6 commit d923e53
Show file tree
Hide file tree
Showing 14 changed files with 97 additions and 25 deletions.
19 changes: 10 additions & 9 deletions google/cloud/documentai_v1/types/document.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ class Document(proto.Message):
Optional. UTF-8 encoded text in reading order
from the document.
text_styles (MutableSequence[google.cloud.documentai_v1.types.Document.Style]):
Placeholder. Styles for the
Styles for the
[Document.text][google.cloud.documentai.v1.Document.text].
pages (MutableSequence[google.cloud.documentai_v1.types.Document.Page]):
Visual page layout for the
Expand Down Expand Up @@ -775,9 +775,9 @@ class FormField(proto.Message):
If the value is non-textual, this field represents the type.
Current valid values are:
- blank (this indicates the field_value is normal text)
- "unfilled_checkbox"
- "filled_checkbox".
- blank (this indicates the ``field_value`` is normal text)
- ``unfilled_checkbox``
- ``filled_checkbox``
corrected_key_text (str):
Created for Labeling UI to export key text. If corrections
were made to the text identified by the
Expand Down Expand Up @@ -1117,8 +1117,8 @@ class NormalizedValue(proto.Message):
For some entity types, one of respective
``structured_value`` fields may also be populated. Also not
all the types of ``structured_value`` will be normalized.
For example, some processors may not generate float or int
normalized text by default.
For example, some processors may not generate ``float`` or
``integer`` normalized text by default.
Below are sample formats mapped to structured values.
Expand Down Expand Up @@ -1323,9 +1323,10 @@ class PageRef(proto.Message):
page (int):
Required. Index into the
[Document.pages][google.cloud.documentai.v1.Document.pages]
element, for example using [Document.pages][page_refs.page]
to locate the related page element. This field is skipped
when its value is the default 0. See
element, for example using
``[Document.pages][page_refs.page]`` to locate the related
page element. This field is skipped when its value is the
default ``0``. See
https://developers.google.com/protocol-buffers/docs/proto3#json.
layout_type (google.cloud.documentai_v1.types.Document.PageAnchor.PageRef.LayoutType):
Optional. The type of the layout element that
Expand Down
2 changes: 1 addition & 1 deletion google/cloud/documentai_v1/types/document_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ class EntityType(proto.Message):
following naming conventions:
- *use ``snake_casing``*
- name matching is case-insensitive
- name matching is case-sensitive
- Maximum 64 characters.
- Must start with a letter.
- Allowed characters: ASCII letters ``[a-z0-9_-]``. (For
Expand Down
7 changes: 7 additions & 0 deletions google/cloud/documentai_v1/types/processor_type.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,9 @@ class ProcessorType(proto.Message):
access.
launch_stage (google.api.launch_stage_pb2.LaunchStage):
Launch stage of the processor type
sample_document_uris (MutableSequence[str]):
A set of Cloud Storage URIs of sample
documents for this processor.
"""

class LocationInfo(proto.Message):
Expand Down Expand Up @@ -92,6 +95,10 @@ class LocationInfo(proto.Message):
number=8,
enum=launch_stage_pb2.LaunchStage,
)
sample_document_uris: MutableSequence[str] = proto.RepeatedField(
proto.STRING,
number=9,
)


__all__ = tuple(sorted(__protobuf__.manifest))
4 changes: 4 additions & 0 deletions google/cloud/documentai_v1beta3/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
GcsDocument,
GcsDocuments,
GcsPrefix,
OcrConfig,
RawDocument,
)
from .types.document_processor_service import (
Expand Down Expand Up @@ -67,6 +68,7 @@
ListProcessorTypesResponse,
ListProcessorVersionsRequest,
ListProcessorVersionsResponse,
ProcessOptions,
ProcessRequest,
ProcessResponse,
ReviewDocumentOperationMetadata,
Expand Down Expand Up @@ -138,6 +140,8 @@
"ListProcessorsRequest",
"ListProcessorsResponse",
"NormalizedVertex",
"OcrConfig",
"ProcessOptions",
"ProcessRequest",
"ProcessResponse",
"Processor",
Expand Down
4 changes: 4 additions & 0 deletions google/cloud/documentai_v1beta3/types/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
GcsDocument,
GcsDocuments,
GcsPrefix,
OcrConfig,
RawDocument,
)
from .document_processor_service import (
Expand Down Expand Up @@ -58,6 +59,7 @@
ListProcessorTypesResponse,
ListProcessorVersionsRequest,
ListProcessorVersionsResponse,
ProcessOptions,
ProcessRequest,
ProcessResponse,
ReviewDocumentOperationMetadata,
Expand Down Expand Up @@ -88,6 +90,7 @@
"GcsDocument",
"GcsDocuments",
"GcsPrefix",
"OcrConfig",
"RawDocument",
"BatchProcessMetadata",
"BatchProcessRequest",
Expand Down Expand Up @@ -123,6 +126,7 @@
"ListProcessorTypesResponse",
"ListProcessorVersionsRequest",
"ListProcessorVersionsResponse",
"ProcessOptions",
"ProcessRequest",
"ProcessResponse",
"ReviewDocumentOperationMetadata",
Expand Down
19 changes: 10 additions & 9 deletions google/cloud/documentai_v1beta3/types/document.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ class Document(proto.Message):
Optional. UTF-8 encoded text in reading order
from the document.
text_styles (MutableSequence[google.cloud.documentai_v1beta3.types.Document.Style]):
Placeholder. Styles for the
Styles for the
[Document.text][google.cloud.documentai.v1beta3.Document.text].
pages (MutableSequence[google.cloud.documentai_v1beta3.types.Document.Page]):
Visual page layout for the
Expand Down Expand Up @@ -779,9 +779,9 @@ class FormField(proto.Message):
If the value is non-textual, this field represents the type.
Current valid values are:
- blank (this indicates the field_value is normal text)
- "unfilled_checkbox"
- "filled_checkbox".
- blank (this indicates the ``field_value`` is normal text)
- ``unfilled_checkbox``
- ``filled_checkbox``
corrected_key_text (str):
Created for Labeling UI to export key text. If corrections
were made to the text identified by the
Expand Down Expand Up @@ -1121,8 +1121,8 @@ class NormalizedValue(proto.Message):
For some entity types, one of respective
``structured_value`` fields may also be populated. Also not
all the types of ``structured_value`` will be normalized.
For example, some processors may not generate float or int
normalized text by default.
For example, some processors may not generate ``float`` or
``integer`` normalized text by default.
Below are sample formats mapped to structured values.
Expand Down Expand Up @@ -1327,9 +1327,10 @@ class PageRef(proto.Message):
page (int):
Required. Index into the
[Document.pages][google.cloud.documentai.v1beta3.Document.pages]
element, for example using [Document.pages][page_refs.page]
to locate the related page element. This field is skipped
when its value is the default 0. See
element, for example using
``[Document.pages][page_refs.page]`` to locate the related
page element. This field is skipped when its value is the
default ``0``. See
https://developers.google.com/protocol-buffers/docs/proto3#json.
layout_type (google.cloud.documentai_v1beta3.types.Document.PageAnchor.PageRef.LayoutType):
Optional. The type of the layout element that
Expand Down
17 changes: 17 additions & 0 deletions google/cloud/documentai_v1beta3/types/document_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
"GcsPrefix",
"BatchDocumentsInputConfig",
"DocumentOutputConfig",
"OcrConfig",
},
)

Expand Down Expand Up @@ -218,4 +219,20 @@ class ShardingConfig(proto.Message):
)


class OcrConfig(proto.Message):
r"""Config for Document OCR.
Attributes:
enable_native_pdf_parsing (bool):
Enables special handling for PDFs with
existing text information. Results in better
text extraction quality in such PDF inputs.
"""

enable_native_pdf_parsing: bool = proto.Field(
proto.BOOL,
number=3,
)


__all__ = tuple(sorted(__protobuf__.manifest))
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@
__protobuf__ = proto.module(
package="google.cloud.documentai.v1beta3",
manifest={
"ProcessOptions",
"ProcessRequest",
"HumanReviewStatus",
"ProcessResponse",
Expand Down Expand Up @@ -83,6 +84,22 @@
)


class ProcessOptions(proto.Message):
r"""Options for Process API
Attributes:
ocr_config (google.cloud.documentai_v1beta3.types.OcrConfig):
Only applicable to "Document OCR Processor".
Returns error if set on other processor types.
"""

ocr_config: document_io.OcrConfig = proto.Field(
proto.MESSAGE,
number=1,
message=document_io.OcrConfig,
)


class ProcessRequest(proto.Message):
r"""Request message for the process document method.
Expand Down Expand Up @@ -125,6 +142,8 @@ class ProcessRequest(proto.Message):
document. Only supports top level document and pages field
so it must be in the form of ``{document_field_name}`` or
``pages.{page_field_name}``.
process_options (google.cloud.documentai_v1beta3.types.ProcessOptions):
Inference-time options for the process API
"""

inline_document: gcd_document.Document = proto.Field(
Expand Down Expand Up @@ -157,6 +176,11 @@ class ProcessRequest(proto.Message):
number=6,
message=field_mask_pb2.FieldMask,
)
process_options: "ProcessOptions" = proto.Field(
proto.MESSAGE,
number=7,
message="ProcessOptions",
)


class HumanReviewStatus(proto.Message):
Expand Down Expand Up @@ -259,6 +283,8 @@ class BatchProcessRequest(proto.Message):
skip_human_review (bool):
Whether Human Review feature should be
skipped for this request. Default to false.
process_options (google.cloud.documentai_v1beta3.types.ProcessOptions):
Inference-time options for the process API
"""

class BatchInputConfig(proto.Message):
Expand Down Expand Up @@ -326,6 +352,11 @@ class BatchOutputConfig(proto.Message):
proto.BOOL,
number=4,
)
process_options: "ProcessOptions" = proto.Field(
proto.MESSAGE,
number=7,
message="ProcessOptions",
)


class BatchProcessResponse(proto.Message):
Expand Down
2 changes: 1 addition & 1 deletion google/cloud/documentai_v1beta3/types/document_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ class EntityType(proto.Message):
following naming conventions:
- *use ``snake_casing``*
- name matching is case-insensitive
- name matching is case-sensitive
- Maximum 64 characters.
- Must start with a letter.
- Allowed characters: ASCII letters ``[a-z0-9_-]``. (For
Expand Down
7 changes: 7 additions & 0 deletions google/cloud/documentai_v1beta3/types/processor_type.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,9 @@ class ProcessorType(proto.Message):
access.
launch_stage (google.api.launch_stage_pb2.LaunchStage):
Launch stage of the processor type
sample_document_uris (MutableSequence[str]):
A set of Cloud Storage URIs of sample
documents for this processor.
"""

class LocationInfo(proto.Message):
Expand Down Expand Up @@ -92,6 +95,10 @@ class LocationInfo(proto.Message):
number=8,
enum=launch_stage_pb2.LaunchStage,
)
sample_document_uris: MutableSequence[str] = proto.RepeatedField(
proto.STRING,
number=9,
)


__all__ = tuple(sorted(__protobuf__.manifest))
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
],
"language": "PYTHON",
"name": "google-cloud-documentai",
"version": "2.5.0"
"version": "0.1.0"
},
"snippets": [
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
],
"language": "PYTHON",
"name": "google-cloud-documentai",
"version": "2.5.0"
"version": "0.1.0"
},
"snippets": [
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
],
"language": "PYTHON",
"name": "google-cloud-documentai",
"version": "2.5.0"
"version": "0.1.0"
},
"snippets": [
{
Expand Down
4 changes: 2 additions & 2 deletions scripts/fixup_documentai_v1beta3_keywords.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def partition(
class documentaiCallTransformer(cst.CSTTransformer):
CTRL_PARAMS: Tuple[str] = ('retry', 'timeout', 'metadata')
METHOD_TO_PARAMS: Dict[str, Tuple[str]] = {
'batch_process_documents': ('name', 'input_configs', 'output_config', 'input_documents', 'document_output_config', 'skip_human_review', ),
'batch_process_documents': ('name', 'input_configs', 'output_config', 'input_documents', 'document_output_config', 'skip_human_review', 'process_options', ),
'create_processor': ('parent', 'processor', ),
'delete_processor': ('name', ),
'delete_processor_version': ('name', ),
Expand All @@ -55,7 +55,7 @@ class documentaiCallTransformer(cst.CSTTransformer):
'list_processors': ('parent', 'page_size', 'page_token', ),
'list_processor_types': ('parent', 'page_size', 'page_token', ),
'list_processor_versions': ('parent', 'page_size', 'page_token', ),
'process_document': ('name', 'inline_document', 'raw_document', 'document', 'skip_human_review', 'field_mask', ),
'process_document': ('name', 'inline_document', 'raw_document', 'document', 'skip_human_review', 'field_mask', 'process_options', ),
'review_document': ('human_review_config', 'inline_document', 'document', 'enable_schema_validation', 'priority', 'document_schema', ),
'set_default_processor_version': ('processor', 'default_processor_version', ),
'train_processor_version': ('parent', 'processor_version', 'document_schema', 'input_data', 'base_processor_version', ),
Expand Down

0 comments on commit d923e53

Please sign in to comment.