Skip to content

Commit

Permalink
feat: Added Training and Evaluation functions, request, responses and…
Browse files Browse the repository at this point in the history
… metadata to document_processor_service.proto

feat: Added evaluation.proto
feat: Added latest_evaluation to processor.proto
chore: removed deprecated flag from REPLACE in OperationType in document.proto

PiperOrigin-RevId: 511230520
  • Loading branch information
Google APIs authored and Copybara-Service committed Feb 21, 2023
1 parent 84bbbc5 commit c53bf8d
Show file tree
Hide file tree
Showing 7 changed files with 426 additions and 17 deletions.
1 change: 1 addition & 0 deletions google/cloud/documentai/v1/BUILD.bazel
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ proto_library(
"document_io.proto",
"document_processor_service.proto",
"document_schema.proto",
"evaluation.proto",
"geometry.proto",
"operation_metadata.proto",
"processor.proto",
Expand Down
31 changes: 20 additions & 11 deletions google/cloud/documentai/v1/document.proto
Original file line number Diff line number Diff line change
Expand Up @@ -735,22 +735,29 @@ message Document {
// Remove an element identified by `parent`.
REMOVE = 2;

// Replace an element identified by `parent`.
// Updates any fields within the given provenance scope of the message. It
// 'overwrites' the fields rather than replacing them. This is
// especially relevant when we just want to update a field value of an
// entity without also affecting all the child properties.
UPDATE = 7;

// Currently unused. Replace an element identified by `parent`.
REPLACE = 3;

// Request human review for the element identified by `parent`.
EVAL_REQUESTED = 4;
// Deprecated. Request human review for the element identified by
// `parent`.
EVAL_REQUESTED = 4 [deprecated = true];

// Element is reviewed and approved at human review, confidence will be
// set to 1.0.
EVAL_APPROVED = 5;
// Deprecated. Element is reviewed and approved at human review,
// confidence will be set to 1.0.
EVAL_APPROVED = 5 [deprecated = true];

// Element is skipped in the validation process.
EVAL_SKIPPED = 6;
// Deprecated. Element is skipped in the validation process.
EVAL_SKIPPED = 6 [deprecated = true];
}

// The index of the revision that produced this element.
int32 revision = 1;
int32 revision = 1 [deprecated = true];

// The Id of this operation. Needs to be unique within the scope of the
// revision.
Expand Down Expand Up @@ -786,7 +793,8 @@ message Document {
string processor = 5;
}

// Id of the revision. Unique within the context of the document.
// Id of the revision, internally generated by doc proto storage.
// Unique within the context of the document.
string id = 1;

// The revisions that this revision is based on. This can include one or
Expand All @@ -799,7 +807,8 @@ message Document {
// `provenance.parent.revision` fields that index into this field.
repeated string parent_ids = 7;

// The time that the revision was created.
// The time that the revision was created, internally generated by
// doc proto storage at the time of create.
google.protobuf.Timestamp create_time = 3;

// Human Review information of this revision.
Expand Down
202 changes: 202 additions & 0 deletions google/cloud/documentai/v1/document_processor_service.proto
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,12 @@ import "google/api/resource.proto";
import "google/cloud/documentai/v1/document.proto";
import "google/cloud/documentai/v1/document_io.proto";
import "google/cloud/documentai/v1/document_schema.proto";
import "google/cloud/documentai/v1/evaluation.proto";
import "google/cloud/documentai/v1/operation_metadata.proto";
import "google/cloud/documentai/v1/processor.proto";
import "google/cloud/documentai/v1/processor_type.proto";
import "google/longrunning/operations.proto";
import "google/protobuf/empty.proto";
import "google/protobuf/field_mask.proto";
import "google/protobuf/timestamp.proto";
import "google/rpc/status.proto";
Expand Down Expand Up @@ -131,6 +133,22 @@ service DocumentProcessorService {
option (google.api.method_signature) = "name";
}

// Trains a new processor version.
// Operation metadata is returned as
// cloud_documentai_core.TrainProcessorVersionMetadata.
rpc TrainProcessorVersion(TrainProcessorVersionRequest)
returns (google.longrunning.Operation) {
option (google.api.http) = {
post: "/v1/{parent=projects/*/locations/*/processors/*}/processorVersions:train"
body: "*"
};
option (google.api.method_signature) = "parent,processor_version";
option (google.longrunning.operation_info) = {
response_type: "TrainProcessorVersionResponse"
metadata_type: "TrainProcessorVersionMetadata"
};
}

// Gets a processor version detail.
rpc GetProcessorVersion(GetProcessorVersionRequest)
returns (ProcessorVersion) {
Expand Down Expand Up @@ -272,6 +290,38 @@ service DocumentProcessorService {
metadata_type: "ReviewDocumentOperationMetadata"
};
}

// Evaluates a ProcessorVersion against annotated documents, producing an
// Evaluation.
rpc EvaluateProcessorVersion(EvaluateProcessorVersionRequest)
returns (google.longrunning.Operation) {
option (google.api.http) = {
post: "/v1/{processor_version=projects/*/locations/*/processors/*/processorVersions/*}:evaluateProcessorVersion"
body: "*"
};
option (google.api.method_signature) = "processor_version";
option (google.longrunning.operation_info) = {
response_type: "EvaluateProcessorVersionResponse"
metadata_type: "EvaluateProcessorVersionMetadata"
};
}

// Retrieves a specific evaluation.
rpc GetEvaluation(GetEvaluationRequest) returns (Evaluation) {
option (google.api.http) = {
get: "/v1/{name=projects/*/locations/*/processors/*/processorVersions/*/evaluations/*}"
};
option (google.api.method_signature) = "name";
}

// Retrieves a set of evaluations for a given processor version.
rpc ListEvaluations(ListEvaluationsRequest)
returns (ListEvaluationsResponse) {
option (google.api.http) = {
get: "/v1/{parent=projects/*/locations/*/processors/*/processorVersions/*}/evaluations"
};
option (google.api.method_signature) = "parent";
}
}

// Request message for the process document method.
Expand Down Expand Up @@ -758,6 +808,81 @@ message SetDefaultProcessorVersionMetadata {
CommonOperationMetadata common_metadata = 1;
}

// Request message for the create processor version method.
message TrainProcessorVersionRequest {
// The input data used to train a new `ProcessorVersion`.
message InputData {
// The documents used for training the new version.
BatchDocumentsInputConfig training_documents = 3;

// The documents used for testing the trained version.
BatchDocumentsInputConfig test_documents = 4;
}

// Required. The parent (project, location and processor) to create the new
// version for. Format:
// `projects/{project}/locations/{location}/processors/{processor}`.
string parent = 1 [
(google.api.field_behavior) = REQUIRED,
(google.api.resource_reference) = {
type: "documentai.googleapis.com/Processor"
}
];

// Required. The processor version to be created.
ProcessorVersion processor_version = 2
[(google.api.field_behavior) = REQUIRED];

// Optional. The schema the processor version will be trained with.
DocumentSchema document_schema = 10 [(google.api.field_behavior) = OPTIONAL];

// Optional. The input data used to train the `ProcessorVersion`.
InputData input_data = 4 [(google.api.field_behavior) = OPTIONAL];

// Optional. The processor version to use as a base for training. This
// processor version must be a child of `parent`. Format:
// `projects/{project}/locations/{location}/processors/{processor}/processorVersions/{processorVersion}`.
string base_processor_version = 8 [(google.api.field_behavior) = OPTIONAL];
}

// The response for the TrainProcessorVersion method.
message TrainProcessorVersionResponse {
// The resource name of the processor version produced by training.
string processor_version = 1;
}

// The metadata that represents a processor version being created.
message TrainProcessorVersionMetadata {
// The dataset validation information.
// This includes any and all errors with documents and the dataset.
message DatasetValidation {
// The total number of document errors.
int32 document_error_count = 3;

// The total number of dataset errors.
int32 dataset_error_count = 4;

// Error information pertaining to specific documents. A maximum of 10
// document errors will be returned.
// Any document with errors will not be used throughout training.
repeated google.rpc.Status document_errors = 1;

// Error information for the dataset as a whole. A maximum of 10 dataset
// errors will be returned.
// A single dataset error is terminal for training.
repeated google.rpc.Status dataset_errors = 2;
}

// The basic metadata of the long running operation.
CommonOperationMetadata common_metadata = 1;

// The training dataset validation information.
DatasetValidation training_dataset_validation = 2;

// The test dataset validation information.
DatasetValidation test_dataset_validation = 3;
}

// Request message for review document method.
message ReviewDocumentRequest {
// The priority level of the human review task.
Expand Down Expand Up @@ -828,3 +953,80 @@ message ReviewDocumentOperationMetadata {
// The Crowd Compute question ID.
string question_id = 6;
}

// Evaluates the given ProcessorVersion against the supplied documents.
message EvaluateProcessorVersionRequest {
// Required. The resource name of the
// [ProcessorVersion][google.cloud.documentai.v1.ProcessorVersion] to
// evaluate.
// `projects/{project}/locations/{location}/processors/{processor}/processorVersions/{processorVersion}`
string processor_version = 1 [
(google.api.field_behavior) = REQUIRED,
(google.api.resource_reference) = {
type: "documentai.googleapis.com/ProcessorVersion"
}
];

// Optional. The documents used in the evaluation. If unspecified, use the
// processor's dataset as evaluation input.
BatchDocumentsInputConfig evaluation_documents = 3
[(google.api.field_behavior) = OPTIONAL];
}

// Metadata of the EvaluateProcessorVersion method.
message EvaluateProcessorVersionMetadata {
// The basic metadata of the long running operation.
CommonOperationMetadata common_metadata = 1;
}

// Metadata of the EvaluateProcessorVersion method.
message EvaluateProcessorVersionResponse {
// The resource name of the created evaluation.
string evaluation = 2;
}

// Retrieves a specific Evaluation.
message GetEvaluationRequest {
// Required. The resource name of the
// [Evaluation][google.cloud.documentai.v1.Evaluation] to get.
// `projects/{project}/locations/{location}/processors/{processor}/processorVersions/{processorVersion}/evaluations/{evaluation}`
string name = 1 [
(google.api.field_behavior) = REQUIRED,
(google.api.resource_reference) = {
type: "documentai.googleapis.com/Evaluation"
}
];
}

// Retrieves a list of evaluations for a given ProcessorVersion.
message ListEvaluationsRequest {
// Required. The resource name of the
// [ProcessorVersion][google.cloud.documentai.v1.ProcessorVersion] to list
// evaluations for.
// `projects/{project}/locations/{location}/processors/{processor}/processorVersions/{processorVersion}`
string parent = 1 [
(google.api.field_behavior) = REQUIRED,
(google.api.resource_reference) = {
type: "documentai.googleapis.com/ProcessorVersion"
}
];

// The standard list page size.
// If unspecified, at most 5 evaluations will be returned.
// The maximum value is 100; values above 100 will be coerced to 100.
int32 page_size = 2;

// A page token, received from a previous `ListEvaluations` call.
// Provide this to retrieve the subsequent page.
string page_token = 3;
}

// The response from ListEvaluations.
message ListEvaluationsResponse {
// The evaluations requested.
repeated Evaluation evaluations = 1;

// A token, which can be sent as `page_token` to retrieve the next page.
// If this field is omitted, there are no subsequent pages.
string next_page_token = 2;
}
20 changes: 14 additions & 6 deletions google/cloud/documentai/v1/document_schema.proto
Original file line number Diff line number Diff line change
Expand Up @@ -38,20 +38,28 @@ message DocumentSchema {

// Defines properties that can be part of the entity type.
message Property {
// Types of occurrences of the entity type in the document. Note: this
// represents the number of instances of an entity types, not number of
// mentions of a given entity instance.
// Types of occurrences of the entity type in the document. This
// represents the number of instances of instances of an entity, not
// number of mentions of an entity. For example, a bank statement may
// only have one `account_number`, but this account number may be
// mentioned in several places on the document. In this case the
// 'account_number' would be considered a `REQUIRED_ONCE` entity type. If,
// on the other hand, we expect a bank statement to contain the status of
// multiple different accounts for the customers, the occurrence type will
// be set to `REQUIRED_MULTIPLE`.
enum OccurrenceType {
// Unspecified occurrence type.
OCCURRENCE_TYPE_UNSPECIFIED = 0;

// There will be zero or one instance of this entity type.
// There will be zero or one instance of this entity type. The same
// entity instance may be mentioned multiple times.
OPTIONAL_ONCE = 1;

// The entity type will appear zero or multiple times.
OPTIONAL_MULTIPLE = 2;

// The entity type will only appear exactly once.
// The entity type will only appear exactly once. The same
// entity instance may be mentioned multiple times.
REQUIRED_ONCE = 3;

// The entity type will appear once or more times.
Expand Down Expand Up @@ -103,7 +111,7 @@ message DocumentSchema {
// one should be set.
repeated string base_types = 2;

// Describing the nested structure, or composition of an entity.
// Description the nested structure, or composition of an entity.
repeated Property properties = 6;
}

Expand Down
4 changes: 4 additions & 0 deletions google/cloud/documentai/v1/documentai_v1.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,14 @@ types:
- name: google.cloud.documentai.v1.DisableProcessorResponse
- name: google.cloud.documentai.v1.EnableProcessorMetadata
- name: google.cloud.documentai.v1.EnableProcessorResponse
- name: google.cloud.documentai.v1.EvaluateProcessorVersionMetadata
- name: google.cloud.documentai.v1.EvaluateProcessorVersionResponse
- name: google.cloud.documentai.v1.ReviewDocumentOperationMetadata
- name: google.cloud.documentai.v1.ReviewDocumentResponse
- name: google.cloud.documentai.v1.SetDefaultProcessorVersionMetadata
- name: google.cloud.documentai.v1.SetDefaultProcessorVersionResponse
- name: google.cloud.documentai.v1.TrainProcessorVersionMetadata
- name: google.cloud.documentai.v1.TrainProcessorVersionResponse
- name: google.cloud.documentai.v1.UndeployProcessorVersionMetadata
- name: google.cloud.documentai.v1.UndeployProcessorVersionResponse

Expand Down
Loading

0 comments on commit c53bf8d

Please sign in to comment.