Skip to content
This repository has been archived by the owner on Jul 13, 2023. It is now read-only.

Commit

Permalink
feat: added font_family to document.proto (#377)
Browse files Browse the repository at this point in the history
- [ ] Regenerate this pull request now.

feat: added ImageQualityScores message to document.proto
feat: added PropertyMetadata and EntityTypeMetadata to document_schema.proto

PiperOrigin-RevId: 486975621

Source-Link: https://togithub.com/googleapis/googleapis/commit/398c9f9fccf4dea8d12e0b5a90ed671dec2ee387

Source-Link: https://togithub.com/googleapis/googleapis-gen/commit/7cd1f5f4e435777cb824af268dc8d37134613e6a
Copy-Tag: eyJwIjoiLmdpdGh1Yi8uT3dsQm90LnlhbWwiLCJoIjoiN2NkMWY1ZjRlNDM1Nzc3Y2I4MjRhZjI2OGRjOGQzNzEzNDYxM2U2YSJ9
  • Loading branch information
gcf-owl-bot[bot] committed Nov 11, 2022
1 parent 6c4471d commit 7ded563
Show file tree
Hide file tree
Showing 13 changed files with 1,258 additions and 284 deletions.
56 changes: 29 additions & 27 deletions protos/google/cloud/documentai/v1/barcode.proto
Original file line number Diff line number Diff line change
Expand Up @@ -28,40 +28,42 @@ option ruby_package = "Google::Cloud::DocumentAI::V1";
message Barcode {
// Format of a barcode.
// The supported formats are:
// CODE_128: Code 128 type.
// CODE_39: Code 39 type.
// CODE_93: Code 93 type.
// CODABAR: Codabar type.
// DATA_MATRIX: 2D Data Matrix type.
// ITF: ITF type.
// EAN_13: EAN-13 type.
// EAN_8: EAN-8 type.
// QR_CODE: 2D QR code type.
// UPC_A: UPC-A type.
// UPC_E: UPC-E type.
// PDF417: PDF417 type.
// AZTEC: 2D Aztec code type.
// DATABAR: GS1 DataBar code type.
//
// - `CODE_128`: Code 128 type.
// - `CODE_39`: Code 39 type.
// - `CODE_93`: Code 93 type.
// - `CODABAR`: Codabar type.
// - `DATA_MATRIX`: 2D Data Matrix type.
// - `ITF`: ITF type.
// - `EAN_13`: EAN-13 type.
// - `EAN_8`: EAN-8 type.
// - `QR_CODE`: 2D QR code type.
// - `UPC_A`: UPC-A type.
// - `UPC_E`: UPC-E type.
// - `PDF417`: PDF417 type.
// - `AZTEC`: 2D Aztec code type.
// - `DATABAR`: GS1 DataBar code type.
string format = 1;

// Value format describes the format of the value that a barcode
// encodes.
// The supported formats are:
// CONTACT_INFO: Contact information.
// EMAIL: Email address.
// ISBN: ISBN identifier.
// PHONE: Phone number.
// PRODUCT: Product.
// SMS: SMS message.
// TEXT: Text string.
// URL: URL address.
// WIFI: Wifi information.
// GEO: Geo-localization.
// CALENDAR_EVENT: Calendar event.
// DRIVER_LICENSE: Driver's license.
//
// - `CONTACT_INFO`: Contact information.
// - `EMAIL`: Email address.
// - `ISBN`: ISBN identifier.
// - `PHONE`: Phone number.
// - `PRODUCT`: Product.
// - `SMS`: SMS message.
// - `TEXT`: Text string.
// - `URL`: URL address.
// - `WIFI`: Wifi information.
// - `GEO`: Geo-localization.
// - `CALENDAR_EVENT`: Calendar event.
// - `DRIVER_LICENSE`: Driver's license.
string value_format = 2;

// Raw value encoded in the barcode.
// For example, 'MEBKM:TITLE:Google;URL:https://www.google.com;;'.
// For example: `'MEBKM:TITLE:Google;URL:https://www.google.com;;'`.
string raw_value = 3;
}
72 changes: 55 additions & 17 deletions protos/google/cloud/documentai/v1/document.proto
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,10 @@ message Document {

// Font size.
FontSize font_size = 7;

// Font family such as `Arial`, `Times New Roman`.
// https://www.w3schools.com/cssref/pr_font_font-family.asp
string font_family = 8;
}

// A page in a [Document][google.cloud.documentai.v1.Document].
Expand Down Expand Up @@ -169,7 +173,7 @@ message Document {

// Confidence of the current [Layout][google.cloud.documentai.v1.Document.Page.Layout] within context of the object this
// layout is for. e.g. confidence can be for a single token, a table,
// a visual element, etc. depending on context. Range [0, 1].
// a visual element, etc. depending on context. Range `[0, 1]`.
float confidence = 2;

// The bounding polygon for the [Layout][google.cloud.documentai.v1.Document.Page.Layout].
Expand All @@ -189,7 +193,7 @@ message Document {
repeated DetectedLanguage detected_languages = 2;

// The history of this annotation.
Provenance provenance = 3;
Provenance provenance = 3 [deprecated = true];
}

// A collection of lines that a human would perceive as a paragraph.
Expand All @@ -201,7 +205,7 @@ message Document {
repeated DetectedLanguage detected_languages = 2;

// The history of this annotation.
Provenance provenance = 3;
Provenance provenance = 3 [deprecated = true];
}

// A collection of tokens that a human would perceive as a line.
Expand All @@ -214,7 +218,7 @@ message Document {
repeated DetectedLanguage detected_languages = 2;

// The history of this annotation.
Provenance provenance = 3;
Provenance provenance = 3 [deprecated = true];
}

// A detected token.
Expand Down Expand Up @@ -249,8 +253,8 @@ message Document {
// A list of detected languages together with confidence.
repeated DetectedLanguage detected_languages = 3;

// The history of this annotation.
Provenance provenance = 4;
// The history of this annotation.
Provenance provenance = 4 [deprecated = true];
}

// A detected symbol.
Expand Down Expand Up @@ -309,6 +313,9 @@ message Document {

// A list of detected languages together with confidence.
repeated DetectedLanguage detected_languages = 4;

// The history of this table.
Provenance provenance = 5;
}

// A form field detected on the page.
Expand Down Expand Up @@ -358,15 +365,43 @@ message Document {

// Detected language for a structural component.
message DetectedLanguage {
// The BCP-47 language code, such as "en-US" or "sr-Latn". For more
// The BCP-47 language code, such as `en-US` or `sr-Latn`. For more
// information, see
// https://www.unicode.org/reports/tr35/#Unicode_locale_identifier.
string language_code = 1;

// Confidence of detected language. Range [0, 1].
// Confidence of detected language. Range `[0, 1]`.
float confidence = 2;
}

// Image Quality Scores for the page image
message ImageQualityScores {
// Image Quality Defects
message DetectedDefect {
// Name of the defect type. Supported values are:
//
// - `quality/defect_blurry`
// - `quality/defect_noisy`
// - `quality/defect_dark`
// - `quality/defect_faint`
// - `quality/defect_text_too_small`
// - `quality/defect_document_cutoff`
// - `quality/defect_text_cutoff`
// - `quality/defect_glare`
string type = 1;

// Confidence of detected defect. Range `[0, 1]` where 1 indicates
// strong confidence of that the defect exists.
float confidence = 2;
}

// The overall quality score. Range `[0, 1]` where 1 is perfect quality.
float quality_score = 1;

// A list of detected defects.
repeated DetectedDefect detected_defects = 2;
}

// 1-based index for current [Page][google.cloud.documentai.v1.Document.Page] in a parent [Document][google.cloud.documentai.v1.Document].
// Useful when a page is taken out of a [Document][google.cloud.documentai.v1.Document] for individual
// processing.
Expand Down Expand Up @@ -422,8 +457,11 @@ message Document {
// A list of detected barcodes.
repeated DetectedBarcode detected_barcodes = 15;

// Image Quality Scores.
ImageQualityScores image_quality_scores = 17;

// The history of this page.
Provenance provenance = 16;
Provenance provenance = 16 [deprecated = true];
}

// An entity that could be a phrase in the text or a property that belongs to
Expand Down Expand Up @@ -471,6 +509,7 @@ message Document {
// or int normalized text by default.
//
// Below are sample formats mapped to structured values.
//
// - Money/Currency type (`money_value`) is in the ISO 4217 text format.
// - Date type (`date_value`) is in the ISO 8601 text format.
// - Datetime type (`datetime_value`) is in the ISO 8601 text format.
Expand All @@ -484,14 +523,13 @@ message Document {
// Required. Entity type from a schema e.g. `Address`.
string type = 2 [(google.api.field_behavior) = REQUIRED];

// Optional. Text value in the document e.g. `1600 Amphitheatre Pkwy`. If the entity
// is not present in the document, this field will be empty.
// Optional. Text value of the entity e.g. `1600 Amphitheatre Pkwy`.
string mention_text = 3 [(google.api.field_behavior) = OPTIONAL];

// Optional. Deprecated. Use `id` field instead.
string mention_id = 4 [(google.api.field_behavior) = OPTIONAL];

// Optional. Confidence of detected Schema entity. Range [0, 1].
// Optional. Confidence of detected Schema entity. Range `[0, 1]`.
float confidence = 5 [(google.api.field_behavior) = OPTIONAL];

// Optional. Represents the provenance of this entity wrt. the location on the
Expand Down Expand Up @@ -605,7 +643,7 @@ message Document {
// Optional. Identifies the bounding polygon of a layout element on the page.
BoundingPoly bounding_poly = 4 [(google.api.field_behavior) = OPTIONAL];

// Optional. Confidence of detected page element, if applicable. Range [0, 1].
// Optional. Confidence of detected page element, if applicable. Range `[0, 1]`.
float confidence = 5 [(google.api.field_behavior) = OPTIONAL];
}

Expand Down Expand Up @@ -726,7 +764,7 @@ message Document {
string changed_text = 2;

// The history of this annotation.
repeated Provenance provenance = 3;
repeated Provenance provenance = 3 [deprecated = true];
}

// Original source document from the user.
Expand Down Expand Up @@ -765,9 +803,9 @@ message Document {
// Placeholder. Relationship among [Document.entities][google.cloud.documentai.v1.Document.entities].
repeated EntityRelation entity_relations = 8;

// Placeholder. A list of text corrections made to [Document.text]. This is
// usually used for annotating corrections to OCR mistakes. Text changes for
// a given revision may not overlap with each other.
// Placeholder. A list of text corrections made to [Document.text][google.cloud.documentai.v1.Document.text]. This
// is usually used for annotating corrections to OCR mistakes. Text changes
// for a given revision may not overlap with each other.
repeated TextChange text_changes = 14;

// Information about the sharding if this document is sharded part of a larger
Expand Down
11 changes: 9 additions & 2 deletions protos/google/cloud/documentai/v1/document_io.proto
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@ syntax = "proto3";

package google.cloud.documentai.v1;

import "google/protobuf/field_mask.proto";

option csharp_namespace = "Google.Cloud.DocumentAI.V1";
option go_package = "google.golang.org/genproto/googleapis/cloud/documentai/v1;documentai";
option java_multiple_files = true;
Expand All @@ -30,7 +32,7 @@ message RawDocument {
bytes content = 1;

// An IANA MIME type (RFC6838) indicating the nature and format of the
// [content].
// [content][google.cloud.documentai.v1.RawDocument.content].
string mime_type = 2;
}

Expand Down Expand Up @@ -59,7 +61,7 @@ message GcsPrefix {
message BatchDocumentsInputConfig {
// The source.
oneof source {
// The set of documents that match the specified Cloud Storage [gcs_prefix].
// The set of documents that match the specified Cloud Storage `gcs_prefix`.
GcsPrefix gcs_prefix = 1;

// The set of documents individually specified on Cloud Storage.
Expand All @@ -74,6 +76,11 @@ message DocumentOutputConfig {
message GcsOutputConfig {
// The Cloud Storage uri (a directory) of the output.
string gcs_uri = 1;

// Specifies which fields to include in the output documents.
// Only supports top level document and pages field so it must be in the
// form of `{document_field_name}` or `pages.{page_field_name}`.
google.protobuf.FieldMask field_mask = 2;
}

// The destination of the results.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -279,6 +279,8 @@ message ProcessRequest {
bool skip_human_review = 3;

// Specifies which fields to include in ProcessResponse's document.
// Only supports top level document and pages field so it must be in the form
// of `{document_field_name}` or `pages.{page_field_name}`.
google.protobuf.FieldMask field_mask = 6;
}

Expand Down
7 changes: 4 additions & 3 deletions protos/google/cloud/documentai/v1/document_schema.proto
Original file line number Diff line number Diff line change
Expand Up @@ -86,15 +86,16 @@ message DocumentSchema {
// Name of the type. It must be unique within the schema file and
// cannot be a 'Common Type'. Besides that we use the following naming
// conventions:
// - *use snake_casing*
//
// - *use `snake_casing`*
// - name matching is case-insensitive
// - Maximum 64 characters.
// - Must start with a letter.
// - Allowed characters: ASCII letters `[a-z0-9_-]`. (For backward
// compatibility internal infrastructure and tooling can handle any ascii
// character)
// - The '/' is sometimes used to denote a property of a type. For example
// line_item/amount. This convention is deprecated, but will still be
// - The `/` is sometimes used to denote a property of a type. For example
// `line_item/amount`. This convention is deprecated, but will still be
// honored for backward compatibility.
string name = 1;

Expand Down
14 changes: 9 additions & 5 deletions protos/google/cloud/documentai/v1/processor.proto
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ package google.cloud.documentai.v1;

import "google/api/field_behavior.proto";
import "google/api/resource.proto";
import "google/cloud/documentai/v1/document_schema.proto";
import "google/protobuf/timestamp.proto";

option csharp_namespace = "Google.Cloud.DocumentAI.V1";
Expand Down Expand Up @@ -85,6 +86,9 @@ message ProcessorVersion {
// The display name of the processor version.
string display_name = 2;

// The schema of the processor version. Describes the output.
DocumentSchema document_schema = 12;

// The state of the processor version.
State state = 6;

Expand Down Expand Up @@ -125,14 +129,14 @@ message Processor {
// The processor is disabled.
DISABLED = 2;

// The processor is being enabled, will become ENABLED if successful.
// The processor is being enabled, will become `ENABLED` if successful.
ENABLING = 3;

// The processor is being disabled, will become DISABLED if successful.
// The processor is being disabled, will become `DISABLED` if successful.
DISABLING = 4;

// The processor is being created, will become either ENABLED (for
// successful creation) or FAILED (for failed ones).
// The processor is being created, will become either `ENABLED` (for
// successful creation) or `FAILED` (for failed ones).
// Once a processor is in this state, it can then be used for document
// processing, but the feature dependencies of the processor might not be
// fully created yet.
Expand All @@ -154,7 +158,7 @@ message Processor {
(google.api.field_behavior) = OUTPUT_ONLY
];

// The processor type, e.g., OCR_PROCESSOR, INVOICE_PROCESSOR, etc.
// The processor type, e.g., `OCR_PROCESSOR`, `INVOICE_PROCESSOR`, etc.
// To get a list of processors types, see
// [FetchProcessorTypes][google.cloud.documentai.v1.DocumentProcessorService.FetchProcessorTypes].
string type = 2;
Expand Down
4 changes: 2 additions & 2 deletions protos/google/cloud/documentai/v1/processor_type.proto
Original file line number Diff line number Diff line change
Expand Up @@ -42,10 +42,10 @@ message ProcessorType {
}

// The resource name of the processor type.
// Format: projects/{project}/processorTypes/{processor_type}
// Format: `projects/{project}/processorTypes/{processor_type}`
string name = 1;

// The type of the processor, e.g., "invoice_parsing".
// The processor type, e.g., `OCR_PROCESSOR`, `INVOICE_PROCESSOR`, etc.
string type = 2;

// The processor category, used by UI to group processor types.
Expand Down
Loading

0 comments on commit 7ded563

Please sign in to comment.