Skip to content
This repository has been archived by the owner on Sep 20, 2023. It is now read-only.

Commit

Permalink
feat: added font_family to document.proto (#404)
Browse files Browse the repository at this point in the history
* feat: added font_family to document.proto
feat: added ImageQualityScores message to document.proto
feat: added PropertyMetadata and EntityTypeMetadata to document_schema.proto

PiperOrigin-RevId: 486975621

Source-Link: googleapis/googleapis@398c9f9

Source-Link: googleapis/googleapis-gen@7cd1f5f
Copy-Tag: eyJwIjoiLmdpdGh1Yi8uT3dsQm90LnlhbWwiLCJoIjoiN2NkMWY1ZjRlNDM1Nzc3Y2I4MjRhZjI2OGRjOGQzNzEzNDYxM2U2YSJ9

* 🦉 Updates from OwlBot post-processor

See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md

* Update constraints-3.7.txt

Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com>
Co-authored-by: Holt Skinner <13262395+holtskinner@users.noreply.github.com>
  • Loading branch information
3 people committed Nov 9, 2022
1 parent acf6324 commit 1038a05
Show file tree
Hide file tree
Showing 9 changed files with 165 additions and 44 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,11 @@
from google.protobuf import timestamp_pb2 # type: ignore

from google.cloud.documentai_v1.services.document_processor_service import pagers
from google.cloud.documentai_v1.types import document, document_processor_service
from google.cloud.documentai_v1.types import (
document,
document_processor_service,
document_schema,
)
from google.cloud.documentai_v1.types import processor
from google.cloud.documentai_v1.types import processor as gcd_processor
from google.cloud.documentai_v1.types import processor_type
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,11 @@
from google.protobuf import timestamp_pb2 # type: ignore

from google.cloud.documentai_v1.services.document_processor_service import pagers
from google.cloud.documentai_v1.types import document, document_processor_service
from google.cloud.documentai_v1.types import (
document,
document_processor_service,
document_schema,
)
from google.cloud.documentai_v1.types import processor
from google.cloud.documentai_v1.types import processor as gcd_processor
from google.cloud.documentai_v1.types import processor_type
Expand Down
48 changes: 32 additions & 16 deletions google/cloud/documentai_v1/types/barcode.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,25 +28,41 @@ class Barcode(proto.Message):
Attributes:
format_ (str):
Format of a barcode. The supported formats are: CODE_128:
Code 128 type. CODE_39: Code 39 type. CODE_93: Code 93 type.
CODABAR: Codabar type. DATA_MATRIX: 2D Data Matrix type.
ITF: ITF type. EAN_13: EAN-13 type. EAN_8: EAN-8 type.
QR_CODE: 2D QR code type. UPC_A: UPC-A type. UPC_E: UPC-E
type. PDF417: PDF417 type. AZTEC: 2D Aztec code type.
DATABAR: GS1 DataBar code type.
Format of a barcode. The supported formats are:
- ``CODE_128``: Code 128 type.
- ``CODE_39``: Code 39 type.
- ``CODE_93``: Code 93 type.
- ``CODABAR``: Codabar type.
- ``DATA_MATRIX``: 2D Data Matrix type.
- ``ITF``: ITF type.
- ``EAN_13``: EAN-13 type.
- ``EAN_8``: EAN-8 type.
- ``QR_CODE``: 2D QR code type.
- ``UPC_A``: UPC-A type.
- ``UPC_E``: UPC-E type.
- ``PDF417``: PDF417 type.
- ``AZTEC``: 2D Aztec code type.
- ``DATABAR``: GS1 DataBar code type.
value_format (str):
Value format describes the format of the value that a
barcode encodes. The supported formats are: CONTACT_INFO:
Contact information. EMAIL: Email address. ISBN: ISBN
identifier. PHONE: Phone number. PRODUCT: Product. SMS: SMS
message. TEXT: Text string. URL: URL address. WIFI: Wifi
information. GEO: Geo-localization. CALENDAR_EVENT: Calendar
event. DRIVER_LICENSE: Driver's license.
barcode encodes. The supported formats are:
- ``CONTACT_INFO``: Contact information.
- ``EMAIL``: Email address.
- ``ISBN``: ISBN identifier.
- ``PHONE``: Phone number.
- ``PRODUCT``: Product.
- ``SMS``: SMS message.
- ``TEXT``: Text string.
- ``URL``: URL address.
- ``WIFI``: Wifi information.
- ``GEO``: Geo-localization.
- ``CALENDAR_EVENT``: Calendar event.
- ``DRIVER_LICENSE``: Driver's license.
raw_value (str):
Raw value encoded in the barcode.
For example,
'MEBKM:TITLE:Google;URL:https://www.google.com;;'.
Raw value encoded in the barcode. For example:
``'MEBKM:TITLE:Google;URL:https://www.google.com;;'``.
"""

format_ = proto.Field(
Expand Down
99 changes: 85 additions & 14 deletions google/cloud/documentai_v1/types/document.py
Original file line number Diff line number Diff line change
Expand Up @@ -85,9 +85,10 @@ class Document(proto.Message):
[Document.entities][google.cloud.documentai.v1.Document.entities].
text_changes (Sequence[google.cloud.documentai_v1.types.Document.TextChange]):
Placeholder. A list of text corrections made to
[Document.text]. This is usually used for annotating
corrections to OCR mistakes. Text changes for a given
revision may not overlap with each other.
[Document.text][google.cloud.documentai.v1.Document.text].
This is usually used for annotating corrections to OCR
mistakes. Text changes for a given revision may not overlap
with each other.
shard_info (google.cloud.documentai_v1.types.Document.ShardInfo):
Information about the sharding if this
document is sharded part of a larger document.
Expand Down Expand Up @@ -153,6 +154,9 @@ class Style(proto.Message):
https://www.w3schools.com/cssref/pr_text_text-decoration.asp
font_size (google.cloud.documentai_v1.types.Document.Style.FontSize):
Font size.
font_family (str):
Font family such as ``Arial``, ``Times New Roman``.
https://www.w3schools.com/cssref/pr_font_font-family.asp
"""

class FontSize(proto.Message):
Expand Down Expand Up @@ -207,6 +211,10 @@ class FontSize(proto.Message):
number=7,
message="Document.Style.FontSize",
)
font_family = proto.Field(
proto.STRING,
number=8,
)

class Page(proto.Message):
r"""A page in a [Document][google.cloud.documentai.v1.Document].
Expand Down Expand Up @@ -266,6 +274,8 @@ class Page(proto.Message):
page.
detected_barcodes (Sequence[google.cloud.documentai_v1.types.Document.Page.DetectedBarcode]):
A list of detected barcodes.
image_quality_scores (google.cloud.documentai_v1.types.Document.Page.ImageQualityScores):
Image Quality Scores.
provenance (google.cloud.documentai_v1.types.Document.Provenance):
The history of this page.
"""
Expand Down Expand Up @@ -374,7 +384,7 @@ class Layout(proto.Message):
[Layout][google.cloud.documentai.v1.Document.Page.Layout]
within context of the object this layout is for. e.g.
confidence can be for a single token, a table, a visual
element, etc. depending on context. Range [0, 1].
element, etc. depending on context. Range ``[0, 1]``.
bounding_poly (google.cloud.documentai_v1.types.BoundingPoly):
The bounding polygon for the
[Layout][google.cloud.documentai.v1.Document.Page.Layout].
Expand Down Expand Up @@ -520,7 +530,7 @@ class Token(proto.Message):
A list of detected languages together with
confidence.
provenance (google.cloud.documentai_v1.types.Document.Provenance):
The history of this annotation.
The history of this annotation.
"""

class DetectedBreak(proto.Message):
Expand Down Expand Up @@ -636,6 +646,8 @@ class Table(proto.Message):
detected_languages (Sequence[google.cloud.documentai_v1.types.Document.Page.DetectedLanguage]):
A list of detected languages together with
confidence.
provenance (google.cloud.documentai_v1.types.Document.Provenance):
The history of this table.
"""

class TableRow(proto.Message):
Expand Down Expand Up @@ -708,6 +720,11 @@ class TableCell(proto.Message):
number=4,
message="Document.Page.DetectedLanguage",
)
provenance = proto.Field(
proto.MESSAGE,
number=5,
message="Document.Provenance",
)

class FormField(proto.Message):
r"""A form field detected on the page.
Expand Down Expand Up @@ -818,11 +835,11 @@ class DetectedLanguage(proto.Message):
Attributes:
language_code (str):
The BCP-47 language code, such as "en-US" or "sr-Latn". For
more information, see
The BCP-47 language code, such as ``en-US`` or ``sr-Latn``.
For more information, see
https://www.unicode.org/reports/tr35/#Unicode_locale_identifier.
confidence (float):
Confidence of detected language. Range [0, 1].
Confidence of detected language. Range ``[0, 1]``.
"""

language_code = proto.Field(
Expand All @@ -834,6 +851,56 @@ class DetectedLanguage(proto.Message):
number=2,
)

class ImageQualityScores(proto.Message):
r"""Image Quality Scores for the page image
Attributes:
quality_score (float):
The overall quality score. Range ``[0, 1]`` where 1 is
perfect quality.
detected_defects (Sequence[google.cloud.documentai_v1.types.Document.Page.ImageQualityScores.DetectedDefect]):
A list of detected defects.
"""

class DetectedDefect(proto.Message):
r"""Image Quality Defects
Attributes:
type_ (str):
Name of the defect type. Supported values are:
- ``quality/defect_blurry``
- ``quality/defect_noisy``
- ``quality/defect_dark``
- ``quality/defect_faint``
- ``quality/defect_text_too_small``
- ``quality/defect_document_cutoff``
- ``quality/defect_text_cutoff``
- ``quality/defect_glare``
confidence (float):
Confidence of detected defect. Range ``[0, 1]`` where 1
indicates strong confidence of that the defect exists.
"""

type_ = proto.Field(
proto.STRING,
number=1,
)
confidence = proto.Field(
proto.FLOAT,
number=2,
)

quality_score = proto.Field(
proto.FLOAT,
number=1,
)
detected_defects = proto.RepeatedField(
proto.MESSAGE,
number=2,
message="Document.Page.ImageQualityScores.DetectedDefect",
)

page_number = proto.Field(
proto.INT32,
number=1,
Expand Down Expand Up @@ -908,6 +975,11 @@ class DetectedLanguage(proto.Message):
number=15,
message="Document.Page.DetectedBarcode",
)
image_quality_scores = proto.Field(
proto.MESSAGE,
number=17,
message="Document.Page.ImageQualityScores",
)
provenance = proto.Field(
proto.MESSAGE,
number=16,
Expand All @@ -927,14 +999,13 @@ class Entity(proto.Message):
type_ (str):
Required. Entity type from a schema e.g. ``Address``.
mention_text (str):
Optional. Text value in the document e.g.
``1600 Amphitheatre Pkwy``. If the entity is not present in
the document, this field will be empty.
Optional. Text value of the entity e.g.
``1600 Amphitheatre Pkwy``.
mention_id (str):
Optional. Deprecated. Use ``id`` field instead.
confidence (float):
Optional. Confidence of detected Schema entity. Range [0,
1].
Optional. Confidence of detected Schema entity. Range
``[0, 1]``.
page_anchor (google.cloud.documentai_v1.types.Document.PageAnchor):
Optional. Represents the provenance of this
entity wrt. the location on the page where it
Expand Down Expand Up @@ -1230,7 +1301,7 @@ class PageRef(proto.Message):
a layout element on the page.
confidence (float):
Optional. Confidence of detected page element, if
applicable. Range [0, 1].
applicable. Range ``[0, 1]``.
"""

class LayoutType(proto.Enum):
Expand Down
16 changes: 14 additions & 2 deletions google/cloud/documentai_v1/types/document_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
#
from google.protobuf import field_mask_pb2 # type: ignore
import proto # type: ignore

__protobuf__ = proto.module(
Expand All @@ -36,7 +37,8 @@ class RawDocument(proto.Message):
Inline document content.
mime_type (str):
An IANA MIME type (RFC6838) indicating the nature and format
of the [content].
of the
[content][google.cloud.documentai.v1.RawDocument.content].
"""

content = proto.Field(
Expand Down Expand Up @@ -113,7 +115,7 @@ class BatchDocumentsInputConfig(proto.Message):
Attributes:
gcs_prefix (google.cloud.documentai_v1.types.GcsPrefix):
The set of documents that match the specified Cloud Storage
[gcs_prefix].
``gcs_prefix``.
This field is a member of `oneof`_ ``source``.
gcs_documents (google.cloud.documentai_v1.types.GcsDocuments):
Expand Down Expand Up @@ -159,12 +161,22 @@ class GcsOutputConfig(proto.Message):
gcs_uri (str):
The Cloud Storage uri (a directory) of the
output.
field_mask (google.protobuf.field_mask_pb2.FieldMask):
Specifies which fields to include in the output documents.
Only supports top level document and pages field so it must
be in the form of ``{document_field_name}`` or
``pages.{page_field_name}``.
"""

gcs_uri = proto.Field(
proto.STRING,
number=1,
)
field_mask = proto.Field(
proto.MESSAGE,
number=2,
message=field_mask_pb2.FieldMask,
)

gcs_output_config = proto.Field(
proto.MESSAGE,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -106,8 +106,10 @@ class ProcessRequest(proto.Message):
Whether Human Review feature should be
skipped for this request. Default to false.
field_mask (google.protobuf.field_mask_pb2.FieldMask):
Specifies which fields to include in
ProcessResponse's document.
Specifies which fields to include in ProcessResponse's
document. Only supports top level document and pages field
so it must be in the form of ``{document_field_name}`` or
``pages.{page_field_name}``.
"""

inline_document = proto.Field(
Expand Down
8 changes: 4 additions & 4 deletions google/cloud/documentai_v1/types/document_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,16 +63,16 @@ class EntityType(proto.Message):
and cannot be a 'Common Type'. Besides that we use the
following naming conventions:
- *use snake_casing*
- *use ``snake_casing``*
- name matching is case-insensitive
- Maximum 64 characters.
- Must start with a letter.
- Allowed characters: ASCII letters ``[a-z0-9_-]``. (For
backward compatibility internal infrastructure and
tooling can handle any ascii character)
- The '/' is sometimes used to denote a property of a type.
For example line_item/amount. This convention is
deprecated, but will still be honored for backward
- The ``/`` is sometimes used to denote a property of a
type. For example ``line_item/amount``. This convention
is deprecated, but will still be honored for backward
compatibility.
base_types (Sequence[str]):
The entity type that this type is derived
Expand Down
Loading

0 comments on commit 1038a05

Please sign in to comment.