Skip to content

Commit

Permalink
feat: add StyleInfo to document.proto
Browse files Browse the repository at this point in the history
feat: add REPLACE enum to OperationType in document.proto
feat: add PropertyMetadata and EntityTypeMetadata to document_schema.proto
feat: add IMPORTING enum to State in processor.proto
chore: updated comments

PiperOrigin-RevId: 540932243
  • Loading branch information
Google APIs authored and Copybara-Service committed Jun 16, 2023
1 parent 2d7af51 commit 120a89c
Show file tree
Hide file tree
Showing 15 changed files with 707 additions and 196 deletions.
12 changes: 10 additions & 2 deletions google/cloud/documentai/v1beta3/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -22,10 +22,12 @@ proto_library(
name = "documentai_proto",
srcs = [
"barcode.proto",
"dataset.proto",
"document.proto",
"document_io.proto",
"document_processor_service.proto",
"document_schema.proto",
"document_service.proto",
"evaluation.proto",
"geometry.proto",
"operation_metadata.proto",
Expand Down Expand Up @@ -91,8 +93,8 @@ java_gapic_library(
rest_numeric_enums = True,
service_yaml = "documentai_v1beta3.yaml",
test_deps = [
":documentai_java_grpc",
"//google/cloud/location:location_java_grpc",
":documentai_java_grpc",
],
transport = "grpc+rest",
deps = [
Expand All @@ -107,6 +109,8 @@ java_gapic_test(
test_classes = [
"com.google.cloud.documentai.v1beta3.DocumentProcessorServiceClientHttpJsonTest",
"com.google.cloud.documentai.v1beta3.DocumentProcessorServiceClientTest",
"com.google.cloud.documentai.v1beta3.DocumentServiceClientHttpJsonTest",
"com.google.cloud.documentai.v1beta3.DocumentServiceClientTest",
],
runtime_deps = [":documentai_java_gapic_test"],
)
Expand Down Expand Up @@ -242,10 +246,13 @@ php_gapic_library(
name = "documentai_php_gapic",
srcs = [":documentai_proto_with_info"],
grpc_service_config = "documentai_v1beta3_grpc_service_config.json",
migration_mode = "PRE_MIGRATION_SURFACE_ONLY",
rest_numeric_enums = True,
service_yaml = "documentai_v1beta3.yaml",
transport = "grpc+rest",
deps = [":documentai_php_proto"],
deps = [
":documentai_php_proto",
],
)

# Open Source Packages
Expand Down Expand Up @@ -355,6 +362,7 @@ load(

csharp_proto_library(
name = "documentai_csharp_proto",
extra_opts = [""],
deps = [":documentai_proto"],
)

Expand Down
2 changes: 1 addition & 1 deletion google/cloud/documentai/v1beta3/barcode.proto
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright 2022 Google LLC
// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
Expand Down
138 changes: 138 additions & 0 deletions google/cloud/documentai/v1beta3/dataset.proto
Original file line number Diff line number Diff line change
@@ -0,0 +1,138 @@
// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

syntax = "proto3";

package google.cloud.documentai.v1beta3;

import "google/api/field_behavior.proto";
import "google/api/resource.proto";
import "google/cloud/documentai/v1beta3/document_io.proto";
import "google/cloud/documentai/v1beta3/document_schema.proto";

option csharp_namespace = "Google.Cloud.DocumentAI.V1Beta3";
option go_package = "cloud.google.com/go/documentai/apiv1beta3/documentaipb;documentaipb";
option java_multiple_files = true;
option java_outer_classname = "DatasetProto";
option java_package = "com.google.cloud.documentai.v1beta3";
option php_namespace = "Google\\Cloud\\DocumentAI\\V1beta3";
option ruby_package = "Google::Cloud::DocumentAI::V1beta3";
option (google.api.resource_definition) = {
type: "contentwarehouse.googleapis.com/Schema"
pattern: "projects/{project}/locations/{location}/schemas/{schema}"
};

// A singleton resource under a
// [Processor][google.cloud.documentai.v1beta3.Processor] which configures a
// collection of documents.
message Dataset {
option (google.api.resource) = {
type: "documentai.googleapis.com/Dataset"
pattern: "projects/{project}/locations/{location}/processors/{processor}/dataset"
};

// Configuration specific to the Cloud Storage-based implementation.
message GCSManagedConfig {
// Required. The Cloud Storage URI (a directory) where the documents
// belonging to the dataset must be stored.
GcsPrefix gcs_prefix = 1 [(google.api.field_behavior) = REQUIRED];
}

// Configuration specific to the Document AI Warehouse-based implementation.
message DocumentWarehouseConfig {
// Output only. The collection in Document AI Warehouse associated with the
// dataset.
string collection = 1 [(google.api.field_behavior) = OUTPUT_ONLY];

// Output only. The schema in Document AI Warehouse associated with the
// dataset.
string schema = 2 [
(google.api.field_behavior) = OUTPUT_ONLY,
(google.api.resource_reference) = {
type: "contentwarehouse.googleapis.com/Schema"
}
];
}

// Configuration specific to an unmanaged dataset.
message UnmanagedDatasetConfig {}

// Configuration specific to spanner-based indexing.
message SpannerIndexingConfig {}

// Different states of a dataset.
enum State {
// Default unspecified enum, should not be used.
STATE_UNSPECIFIED = 0;

// Dataset has not been initialized.
UNINITIALIZED = 1;

// Dataset is being initialized.
INITIALIZING = 2;

// Dataset has been initialized.
INITIALIZED = 3;
}

oneof storage_source {
// Optional. User-managed Cloud Storage dataset configuration. Use this
// configuration if the dataset documents are stored under a user-managed
// Cloud Storage location.
GCSManagedConfig gcs_managed_config = 3
[(google.api.field_behavior) = OPTIONAL];

// Optional. Document AI Warehouse-based dataset configuration.
DocumentWarehouseConfig document_warehouse_config = 5
[(google.api.field_behavior) = OPTIONAL];

// Optional. Unmanaged dataset configuration. Use this configuration if the
// dataset documents are managed by the document service internally (not
// user-managed).
UnmanagedDatasetConfig unmanaged_dataset_config = 6
[(google.api.field_behavior) = OPTIONAL];
}

oneof indexing_source {
// Optional. A lightweight indexing source with low latency and high
// reliability, but lacking advanced features like CMEK and content-based
// search.
SpannerIndexingConfig spanner_indexing_config = 4
[(google.api.field_behavior) = OPTIONAL];
}

// Dataset resource name.
// Format:
// `projects/{project}/locations/{location}/processors/{processor}/dataset`
string name = 1;

// Required. State of the dataset. Ignored when updating dataset.
State state = 2 [(google.api.field_behavior) = REQUIRED];
}

// Dataset Schema.
message DatasetSchema {
option (google.api.resource) = {
type: "documentai.googleapis.com/DatasetSchema"
pattern: "projects/{project}/locations/{location}/processors/{processor}/dataset/datasetSchema"
};

// Dataset schema resource name.
// Format:
// `projects/{project}/locations/{location}/processors/{processor}/dataset/datasetSchema`
string name = 1;

// Optional. Schema of the dataset.
DocumentSchema document_schema = 3 [(google.api.field_behavior) = OPTIONAL];
}

0 comments on commit 120a89c

Please sign in to comment.