-
Notifications
You must be signed in to change notification settings - Fork 2.3k
/
document.proto
356 lines (281 loc) · 10.6 KB
/
document.proto
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
// Copyright 2023 Google LLC
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
syntax = "proto3";
package google.cloud.contentwarehouse.v1;
import "google/api/field_behavior.proto";
import "google/api/resource.proto";
import "google/cloud/documentai/v1/document.proto";
import "google/protobuf/timestamp.proto";
import "google/type/datetime.proto";
option csharp_namespace = "Google.Cloud.ContentWarehouse.V1";
option go_package = "cloud.google.com/go/contentwarehouse/apiv1/contentwarehousepb;contentwarehousepb";
option java_multiple_files = true;
option java_outer_classname = "DocumentProto";
option java_package = "com.google.cloud.contentwarehouse.v1";
option php_namespace = "Google\\Cloud\\ContentWarehouse\\V1";
option ruby_package = "Google::Cloud::ContentWarehouse::V1";
// Defines the structure for content warehouse document proto.
message Document {
option (google.api.resource) = {
type: "contentwarehouse.googleapis.com/Document"
pattern: "projects/{project}/locations/{location}/documents/{document}"
pattern: "projects/{project}/locations/{location}/documents/referenceId/{reference_id}"
};
// The resource name of the document.
// Format:
// projects/{project_number}/locations/{location}/documents/{document_id}.
//
// The name is ignored when creating a document.
string name = 1;
// The reference ID set by customers. Must be unique per project and location.
string reference_id = 11;
// Required. Display name of the document given by the user. This name will be
// displayed in the UI. Customer can populate this field with the name of the
// document. This differs from the 'title' field as 'title' is optional and
// stores the top heading in the document.
string display_name = 2 [(google.api.field_behavior) = REQUIRED];
// Title that describes the document.
// This can be the top heading or text that describes the document.
string title = 18;
// Uri to display the document, for example, in the UI.
string display_uri = 17;
// The Document schema name.
// Format:
// projects/{project_number}/locations/{location}/documentSchemas/{document_schema_id}.
string document_schema_name = 3 [(google.api.resource_reference) = {
type: "contentwarehouse.googleapis.com/DocumentSchema"
}];
oneof structured_content {
// Other document format, such as PPTX, XLXS
string plain_text = 15;
// Document AI format to save the structured content, including OCR.
google.cloud.documentai.v1.Document cloud_ai_document = 4;
}
// A path linked to structured content file.
string structured_content_uri = 16 [deprecated = true];
// Raw document file.
oneof raw_document {
// Raw document file in Cloud Storage path.
string raw_document_path = 5;
// Raw document content.
bytes inline_raw_document = 6;
}
// List of values that are user supplied metadata.
repeated Property properties = 7;
// Output only. The time when the document is last updated.
google.protobuf.Timestamp update_time = 8
[(google.api.field_behavior) = OUTPUT_ONLY];
// Output only. The time when the document is created.
google.protobuf.Timestamp create_time = 9
[(google.api.field_behavior) = OUTPUT_ONLY];
// This is used when DocAI was not used to load the document and parsing/
// extracting is needed for the inline_raw_document. For example, if
// inline_raw_document is the byte representation of a PDF file, then
// this should be set to: RAW_DOCUMENT_FILE_TYPE_PDF.
RawDocumentFileType raw_document_file_type = 10;
// If true, makes the document visible to asynchronous policies and rules.
bool async_enabled = 12 [deprecated = true];
// Indicates the category (image, audio, video etc.) of the original content.
ContentCategory content_category = 20;
// If true, text extraction will not be performed.
bool text_extraction_disabled = 19 [deprecated = true];
// If true, text extraction will be performed.
bool text_extraction_enabled = 21;
// The user who creates the document.
string creator = 13;
// The user who lastly updates the document.
string updater = 14;
// Output only. If linked to a Collection with RetentionPolicy, the date when
// the document becomes mutable.
google.protobuf.Timestamp disposition_time = 22
[(google.api.field_behavior) = OUTPUT_ONLY];
// Output only. Indicates if the document has a legal hold on it.
bool legal_hold = 23 [(google.api.field_behavior) = OUTPUT_ONLY];
}
// References to the documents.
message DocumentReference {
// Required. Name of the referenced document.
string document_name = 1 [
(google.api.field_behavior) = REQUIRED,
(google.api.resource_reference) = {
type: "contentwarehouse.googleapis.com/Document"
}
];
// display_name of the referenced document; this name does not need to be
// consistent to the display_name in the Document proto, depending on the ACL
// constraint.
string display_name = 2;
// Stores the subset of the referenced document's content.
// This is useful to allow user peek the information of the referenced
// document.
string snippet = 3;
// The document type of the document being referenced.
bool document_is_folder = 4;
// Output only. The time when the document is last updated.
google.protobuf.Timestamp update_time = 5
[(google.api.field_behavior) = OUTPUT_ONLY];
// Output only. The time when the document is created.
google.protobuf.Timestamp create_time = 6
[(google.api.field_behavior) = OUTPUT_ONLY];
// Output only. The time when the document is deleted.
google.protobuf.Timestamp delete_time = 7
[(google.api.field_behavior) = OUTPUT_ONLY];
// Document is a folder with retention policy.
bool document_is_retention_folder = 8;
// Document is a folder with legal hold.
bool document_is_legal_hold_folder = 9;
}
// Property of a document.
message Property {
// Required. Must match the name of a PropertyDefinition in the
// DocumentSchema.
string name = 1 [(google.api.field_behavior) = REQUIRED];
// Type of the property.
// Must match the property_options type of the matching PropertyDefinition.
// Value of the Property parsed into a specific data type.
// Specific type value(s) obtained from Document AIs Property.mention_text
// field.
oneof values {
// Integer property values.
IntegerArray integer_values = 2;
// Float property values.
FloatArray float_values = 3;
// String/text property values.
TextArray text_values = 4;
// Enum property values.
EnumArray enum_values = 5;
// Nested structured data property values.
PropertyArray property_values = 6;
// Date time property values.
// It is not supported by CMEK compliant deployment.
DateTimeArray date_time_values = 7;
// Map property values.
MapProperty map_property = 8;
// Timestamp property values.
// It is not supported by CMEK compliant deployment.
TimestampArray timestamp_values = 9;
}
}
// Integer values.
message IntegerArray {
// List of integer values.
repeated int32 values = 1;
}
// Float values.
message FloatArray {
// List of float values.
repeated float values = 1;
}
// String/text values.
message TextArray {
// List of text values.
repeated string values = 1;
}
// Enum values.
message EnumArray {
// List of enum values.
repeated string values = 1;
}
// DateTime values.
message DateTimeArray {
// List of datetime values.
// Both OffsetDateTime and ZonedDateTime are supported.
repeated google.type.DateTime values = 1;
}
// Timestamp values.
message TimestampArray {
// List of timestamp values.
repeated TimestampValue values = 1;
}
// Timestamp value type.
message TimestampValue {
oneof value {
// Timestamp value
google.protobuf.Timestamp timestamp_value = 1;
// The string must represent a valid instant in UTC and is parsed using
// java.time.format.DateTimeFormatter.ISO_INSTANT.
// e.g. "2013-09-29T18:46:19Z"
string text_value = 2;
}
}
// Property values.
message PropertyArray {
// List of property values.
repeated Property properties = 1;
}
// Map property value.
// Represents a structured entries of key value pairs, consisting of field names
// which map to dynamically typed values.
message MapProperty {
// Unordered map of dynamically typed values.
map<string, Value> fields = 1;
}
// `Value` represents a dynamically typed value which can be either be
// a float, a integer, a string, or a datetime value. A producer of value is
// expected to set one of these variants. Absence of any variant indicates an
// error.
message Value {
// The kind of value.
oneof kind {
// Represents a float value.
float float_value = 1;
// Represents a integer value.
int32 int_value = 2;
// Represents a string value.
string string_value = 3;
// Represents an enum value.
EnumValue enum_value = 4;
// Represents a datetime value.
google.type.DateTime datetime_value = 5;
// Represents a timestamp value.
TimestampValue timestamp_value = 6;
// Represents a boolean value.
bool boolean_value = 7;
}
}
// Represents the string value of the enum field.
message EnumValue {
// String value of the enum field. This must match defined set of enums
// in document schema using EnumTypeOptions.
string value = 1;
}
// When a raw document is supplied, this indicates the file format
enum RawDocumentFileType {
// No raw document specified or it is non-parsable
RAW_DOCUMENT_FILE_TYPE_UNSPECIFIED = 0;
// Adobe PDF format
RAW_DOCUMENT_FILE_TYPE_PDF = 1;
// Microsoft Word format
RAW_DOCUMENT_FILE_TYPE_DOCX = 2;
// Microsoft Excel format
RAW_DOCUMENT_FILE_TYPE_XLSX = 3;
// Microsoft Powerpoint format
RAW_DOCUMENT_FILE_TYPE_PPTX = 4;
// UTF-8 encoded text format
RAW_DOCUMENT_FILE_TYPE_TEXT = 5;
// TIFF or TIF image file format
RAW_DOCUMENT_FILE_TYPE_TIFF = 6;
}
// When a raw document or structured content is supplied, this stores the
// content category.
enum ContentCategory {
// No category is specified.
CONTENT_CATEGORY_UNSPECIFIED = 0;
// Content is of image type.
CONTENT_CATEGORY_IMAGE = 1;
// Content is of audio type.
CONTENT_CATEGORY_AUDIO = 2;
// Content is of video type.
CONTENT_CATEGORY_VIDEO = 3;
}