From d5020fff4cbe108bdf506074791c56cff7840bef Mon Sep 17 00:00:00 2001 From: Google APIs Date: Tue, 2 Apr 2024 11:01:15 -0700 Subject: [PATCH] feat: Support a new Layout Processor in Document AI docs: keep the API doc up-to-date with recent changes PiperOrigin-RevId: 621233157 --- .../cloud/documentai/v1beta3/document.proto | 137 ++++++++++++++++++ .../v1beta3/document_processor_service.proto | 32 ++++ .../cloud/documentai/v1beta3/processor.proto | 8 +- 3 files changed, 173 insertions(+), 4 deletions(-) diff --git a/google/cloud/documentai/v1beta3/document.proto b/google/cloud/documentai/v1beta3/document.proto index aeb805b215bb6..d93aafa98c50d 100644 --- a/google/cloud/documentai/v1beta3/document.proto +++ b/google/cloud/documentai/v1beta3/document.proto @@ -897,6 +897,137 @@ message Document { repeated Provenance provenance = 3 [deprecated = true]; } + // Represents the parsed layout of a document as a collection of blocks that + // the document is divided into. + message DocumentLayout { + // Represents a block. A block could be one of the various types (text, + // table, list) supported. + message DocumentLayoutBlock { + // Represents where the block starts and ends in the document. + message LayoutPageSpan { + // Page where block starts in the document. + int32 page_start = 1; + + // Page where block ends in the document. + int32 page_end = 2; + } + + // Represents a text type block. + message LayoutTextBlock { + // Text content stored in the block. + string text = 1; + + // Type of the text in the block. Available options are: `paragraph`, + // `subtitle`, `heading-1`, `heading-2`, `heading-3`, `heading-4`, + // `heading-5`, `header`, `footer`. + string type = 2; + + // A text block could further have child blocks. + // Repeated blocks support further hierarchies and nested blocks. + repeated DocumentLayoutBlock blocks = 3; + } + + // Represents a table type block. + message LayoutTableBlock { + // Header rows at the top of the table. + repeated LayoutTableRow header_rows = 1; + + // Body rows containing main table content. + repeated LayoutTableRow body_rows = 2; + + // Table caption/title. + string caption = 3; + } + + // Represents a row in a table. + message LayoutTableRow { + // A table row is a list of table cells. + repeated LayoutTableCell cells = 1; + } + + // Represents a cell in a table row. + message LayoutTableCell { + // A table cell is a list of blocks. + // Repeated blocks support further hierarchies and nested blocks. + repeated DocumentLayoutBlock blocks = 1; + + // How many rows this cell spans. + int32 row_span = 2; + + // How many columns this cell spans. + int32 col_span = 3; + } + + // Represents a list type block. + message LayoutListBlock { + // List entries that constitute a list block. + repeated LayoutListEntry list_entries = 1; + + // Type of the list_entries (if exist). Available options are `ordered` + // and `unordered`. + string type = 2; + } + + // Represents an entry in the list. + message LayoutListEntry { + // A list entry is a list of blocks. + // Repeated blocks support further hierarchies and nested blocks. + repeated DocumentLayoutBlock blocks = 1; + } + + oneof block { + // Block consisting of text content. + LayoutTextBlock text_block = 2; + + // Block consisting of table content/structure. + LayoutTableBlock table_block = 3; + + // Block consisting of list content/structure. + LayoutListBlock list_block = 4; + } + + // ID of the block. + string block_id = 1; + + // Page span of the block. + LayoutPageSpan page_span = 5; + } + + // List of blocks in the document. + repeated DocumentLayoutBlock blocks = 1; + } + + // Represents the chunks that the document is divided into. + message ChunkedDocument { + // Represents a chunk. + message Chunk { + // Represents where the chunk starts and ends in the document. + message ChunkPageSpan { + // Page where chunk starts in the document. + int32 page_start = 1; + + // Page where chunk ends in the document. + int32 page_end = 2; + } + + // ID of the chunk. + string chunk_id = 1; + + // List of all parsed documents layout source blocks used to generate the + // chunk. + repeated string source_block_ids = 2; + + // Text content of the chunk. + string content = 3; + + // Page span of the chunk. + ChunkPageSpan page_span = 4; + } + + // List of chunks. + repeated Chunk chunks = 1; + } + // Original source document from the user. oneof source { // Optional. Currently supports Google Cloud Storage URI of the form @@ -950,6 +1081,12 @@ message Document { // Placeholder. Revision history of this document. repeated Revision revisions = 13; + + // Parsed layout of the document. + DocumentLayout document_layout = 17; + + // Document chunked based on chunking config. + ChunkedDocument chunked_document = 18; } // The revision reference specifies which revision on the document to read. diff --git a/google/cloud/documentai/v1beta3/document_processor_service.proto b/google/cloud/documentai/v1beta3/document_processor_service.proto index 9769b1042a80d..d1afd49cfefb1 100644 --- a/google/cloud/documentai/v1beta3/document_processor_service.proto +++ b/google/cloud/documentai/v1beta3/document_processor_service.proto @@ -342,6 +342,34 @@ service DocumentProcessorService { // Options for Process API message ProcessOptions { + // Serving config for layout parser processor. + message LayoutConfig { + // Serving config for chunking. + message ChunkingConfig { + // Optional. The chunk sizes to use when splitting documents, in order of + // level. + int32 chunk_size = 1 [(google.api.field_behavior) = OPTIONAL]; + + // Optional. Whether or not to include ancestor headings when splitting. + bool include_ancestor_headings = 2 + [(google.api.field_behavior) = OPTIONAL]; + + // Optional. The number of tokens to group together when evaluating + // semantic similarity. + bool semantic_chunking_group_size = 3 + [(google.api.field_behavior) = OPTIONAL]; + + // Optional. The percentile of cosine dissimilarity that must be exceeded + // between a group of tokens and the next. The smaller this number is, the + // more chunks will be generated. + int32 breakpoint_percentile_threshold = 4 + [(google.api.field_behavior) = OPTIONAL]; + } + + // Optional. Config for chunking in layout parser processor. + ChunkingConfig chunking_config = 1 [(google.api.field_behavior) = OPTIONAL]; + } + // A list of individual page numbers. message IndividualPageSelector { // Optional. Indices of the pages (starting from 1). @@ -370,6 +398,10 @@ message ProcessOptions { // Returns error if set on other processor types. OcrConfig ocr_config = 1; + // Optional. Only applicable to `LAYOUT_PARSER_PROCESSOR`. + // Returns error if set on other processor types. + LayoutConfig layout_config = 9 [(google.api.field_behavior) = OPTIONAL]; + // Optional. Override the schema of the // [ProcessorVersion][google.cloud.documentai.v1beta3.ProcessorVersion]. Will // return an Invalid Argument error if this field is set when the underlying diff --git a/google/cloud/documentai/v1beta3/processor.proto b/google/cloud/documentai/v1beta3/processor.proto index 6a4437ac1af1e..655f1b17deac9 100644 --- a/google/cloud/documentai/v1beta3/processor.proto +++ b/google/cloud/documentai/v1beta3/processor.proto @@ -94,10 +94,10 @@ message ProcessorVersion { MODEL_TYPE_CUSTOM = 2; } - // The resource name of the processor version. + // Identifier. The resource name of the processor version. // Format: // `projects/{project}/locations/{location}/processors/{processor}/processorVersions/{processor_version}` - string name = 1; + string name = 1 [(google.api.field_behavior) = IDENTIFIER]; // The display name of the processor version. string display_name = 2; @@ -105,8 +105,8 @@ message ProcessorVersion { // The schema of the processor version. Describes the output. DocumentSchema document_schema = 12; - // The state of the processor version. - State state = 6; + // Output only. The state of the processor version. + State state = 6 [(google.api.field_behavior) = OUTPUT_ONLY]; // The time the processor version was created. google.protobuf.Timestamp create_time = 7;