From 99fd8be66d7b04e7f73c801be1b4d49867e3b7e1 Mon Sep 17 00:00:00 2001 From: Google APIs Date: Fri, 8 Apr 2022 09:38:07 -0700 Subject: [PATCH] docs: update taxonomy display_name comment feat: added Dataplex specific fields PiperOrigin-RevId: 440386238 --- google/cloud/datacatalog/v1/BUILD.bazel | 2 + google/cloud/datacatalog/v1/common.proto | 3 + google/cloud/datacatalog/v1/data_source.proto | 36 ++++++++ google/cloud/datacatalog/v1/datacatalog.proto | 30 ++++++- .../cloud/datacatalog/v1/dataplex_spec.proto | 90 +++++++++++++++++++ .../datacatalog/v1/physical_schema.proto | 83 +++++++++++++++++ .../datacatalog/v1/policytagmanager.proto | 2 + .../v1/policytagmanagerserialization.proto | 1 - google/cloud/datacatalog/v1/tags.proto | 2 +- 9 files changed, 243 insertions(+), 6 deletions(-) create mode 100644 google/cloud/datacatalog/v1/dataplex_spec.proto create mode 100644 google/cloud/datacatalog/v1/physical_schema.proto diff --git a/google/cloud/datacatalog/v1/BUILD.bazel b/google/cloud/datacatalog/v1/BUILD.bazel index 6c6fb7655edd1..8348bcf9e8d8e 100644 --- a/google/cloud/datacatalog/v1/BUILD.bazel +++ b/google/cloud/datacatalog/v1/BUILD.bazel @@ -25,7 +25,9 @@ proto_library( "common.proto", "data_source.proto", "datacatalog.proto", + "dataplex_spec.proto", "gcs_fileset_spec.proto", + "physical_schema.proto", "policytagmanager.proto", "policytagmanagerserialization.proto", "schema.proto", diff --git a/google/cloud/datacatalog/v1/common.proto b/google/cloud/datacatalog/v1/common.proto index a7fe9c97c2bd9..7a6bef5f2223e 100644 --- a/google/cloud/datacatalog/v1/common.proto +++ b/google/cloud/datacatalog/v1/common.proto @@ -48,4 +48,7 @@ enum IntegratedSystem { // Dataproc Metastore. DATAPROC_METASTORE = 3; + + // Dataplex. + DATAPLEX = 4; } diff --git a/google/cloud/datacatalog/v1/data_source.proto b/google/cloud/datacatalog/v1/data_source.proto index 13a2c4f3a160b..a73ff09c7567b 100644 --- a/google/cloud/datacatalog/v1/data_source.proto +++ b/google/cloud/datacatalog/v1/data_source.proto @@ -48,4 +48,40 @@ message DataSource { // // `//bigquery.googleapis.com/projects/{PROJECT_ID}/locations/{LOCATION}/datasets/{DATASET_ID}/tables/{TABLE_ID}` string resource = 2; + + // Output only. Data Catalog entry name, if applicable. + string source_entry = 3 [(google.api.field_behavior) = OUTPUT_ONLY]; + + oneof properties { + // Detailed properties of the underlying storage. + StorageProperties storage_properties = 4; + } +} + +// Details the properties of the underlying storage. +message StorageProperties { + // Patterns to identify a set of files for this fileset. + // + // Examples of a valid `file_pattern`: + // + // * `gs://bucket_name/dir/*`: matches all files in the `bucket_name/dir` + // directory + // * `gs://bucket_name/dir/**`: matches all files in the `bucket_name/dir` + // and all subdirectories recursively + // * `gs://bucket_name/file*`: matches files prefixed by `file` in + // `bucket_name` + // * `gs://bucket_name/??.txt`: matches files with two characters followed by + // `.txt` in `bucket_name` + // * `gs://bucket_name/[aeiou].txt`: matches files that contain a single + // vowel character followed by `.txt` in + // `bucket_name` + // * `gs://bucket_name/[a-m].txt`: matches files that contain `a`, `b`, ... + // or `m` followed by `.txt` in `bucket_name` + // * `gs://bucket_name/a/*/b`: matches all files in `bucket_name` that match + // the `a/*/b` pattern, such as `a/c/b`, `a/d/b` + // * `gs://another_bucket/a.txt`: matches `gs://another_bucket/a.txt` + repeated string file_pattern = 1; + + // File type in MIME format, for example, `text/plain`. + string file_type = 2; } diff --git a/google/cloud/datacatalog/v1/datacatalog.proto b/google/cloud/datacatalog/v1/datacatalog.proto index 820682bca8000..b31eed89371c6 100644 --- a/google/cloud/datacatalog/v1/datacatalog.proto +++ b/google/cloud/datacatalog/v1/datacatalog.proto @@ -23,6 +23,7 @@ import "google/api/resource.proto"; import "google/cloud/datacatalog/v1/bigquery.proto"; import "google/cloud/datacatalog/v1/common.proto"; import "google/cloud/datacatalog/v1/data_source.proto"; +import "google/cloud/datacatalog/v1/dataplex_spec.proto"; import "google/cloud/datacatalog/v1/gcs_fileset_spec.proto"; import "google/cloud/datacatalog/v1/schema.proto"; import "google/cloud/datacatalog/v1/search.proto"; @@ -34,7 +35,6 @@ import "google/iam/v1/iam_policy.proto"; import "google/iam/v1/policy.proto"; import "google/protobuf/empty.proto"; import "google/protobuf/field_mask.proto"; -import "google/protobuf/timestamp.proto"; option cc_enable_arenas = true; option csharp_namespace = "Google.Cloud.DataCatalog.V1"; @@ -1065,6 +1065,10 @@ message Entry { // Specification that applies to a user-defined function or procedure. Valid // only for entries with the `ROUTINE` type. RoutineSpec routine_spec = 28; + + // Specification that applies to a fileset resource. Valid only + // for entries with the `FILESET` type. + FilesetSpec fileset_spec = 33; } // Display name of an entry. @@ -1085,7 +1089,7 @@ message Entry { // Default value is an empty string. string description = 4; - // Business Context of the entry. Not supported for BigQuery datasets. + // Business Context of the entry. Not supported for BigQuery datasets BusinessContext business_context = 37; // Schema of the entry. An entry might not have any schema attached to it. @@ -1133,6 +1137,18 @@ message DatabaseTableSpec { // Type of this table. TableType type = 1; + + // Fields specific to a Dataplex table and present only in the Dataplex table + // entries. + DataplexTableSpec dataplex_table = 2; +} + +// Specification that applies to a fileset. Valid only for entries with the +// 'FILESET' type. +message FilesetSpec { + // Fields specific to a Dataplex fileset and present only in the Dataplex + // fileset entries. + DataplexFilesetSpec dataplex_fileset = 1; } // Specification that applies to a data source connection. Valid only for @@ -1241,8 +1257,8 @@ message Contacts { // Designation of the person, for example, Data Steward. string designation = 1; - // Email of the person in the format of `john.doe@example.com`, - // ``, or `John Doe`. + // Email of the person in the format of `john.doe@xyz`, + // ``, or `John Doe`. string email = 2; } @@ -1681,6 +1697,12 @@ enum EntryType { // Output only. Routine, for example, a BigQuery routine. ROUTINE = 9; + // A Dataplex lake. + LAKE = 10; + + // A Dataplex zone. + ZONE = 11; + // A service, for example, a Dataproc Metastore service. SERVICE = 14; } diff --git a/google/cloud/datacatalog/v1/dataplex_spec.proto b/google/cloud/datacatalog/v1/dataplex_spec.proto new file mode 100644 index 0000000000000..0cdba3e1f8c67 --- /dev/null +++ b/google/cloud/datacatalog/v1/dataplex_spec.proto @@ -0,0 +1,90 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto3"; + +package google.cloud.datacatalog.v1; + +import "google/cloud/datacatalog/v1/common.proto"; +import "google/cloud/datacatalog/v1/physical_schema.proto"; + +option cc_enable_arenas = true; +option csharp_namespace = "Google.Cloud.DataCatalog.V1"; +option go_package = "google.golang.org/genproto/googleapis/cloud/datacatalog/v1;datacatalog"; +option java_multiple_files = true; +option java_outer_classname = "DataplexSpecProto"; +option java_package = "com.google.cloud.datacatalog.v1"; +option php_namespace = "Google\\Cloud\\DataCatalog\\V1"; +option ruby_package = "Google::Cloud::DataCatalog::V1"; + +// Common Dataplex fields. +message DataplexSpec { + // Fully qualified resource name of an asset in Dataplex, to which the + // underlying data source (Cloud Storage bucket or BigQuery dataset) of the + // entity is attached. + string asset = 1; + + // Format of the data. + PhysicalSchema data_format = 2; + + // Compression format of the data, e.g., zip, gzip etc. + string compression_format = 3; + + // Project ID of the underlying Cloud Storage or BigQuery data. Note that + // this may not be the same project as the correspondingly Dataplex lake / + // zone / asset. + string project_id = 4; +} + +// Entry specyfication for a Dataplex fileset. +message DataplexFilesetSpec { + // Common Dataplex fields. + DataplexSpec dataplex_spec = 1; +} + +// Entry specification for a Dataplex table. +message DataplexTableSpec { + // List of external tables registered by Dataplex in other systems based on + // the same underlying data. + // + // External tables allow to query this data in those systems. + repeated DataplexExternalTable external_tables = 1; + + // Common Dataplex fields. + DataplexSpec dataplex_spec = 2; + + // Indicates if the table schema is managed by the user or not. + bool user_managed = 3; +} + +// External table registered by Dataplex. +// Dataplex publishes data discovered from an asset into multiple other systems +// (BigQuery, DPMS) in form of tables. We call them "external tables". External +// tables are also synced into the Data Catalog. +// This message contains pointers to +// those external tables (fully qualified name, resource name et cetera) within +// the Data Catalog. +message DataplexExternalTable { + // Service in which the external table is registered. + IntegratedSystem system = 1; + + // Fully qualified name (FQN) of the external table. + string fully_qualified_name = 28; + + // Google Cloud resource name of the external table. + string google_cloud_resource = 3; + + // Name of the Data Catalog entry representing the external table. + string data_catalog_entry = 4; +} diff --git a/google/cloud/datacatalog/v1/physical_schema.proto b/google/cloud/datacatalog/v1/physical_schema.proto new file mode 100644 index 0000000000000..414e95ca7f277 --- /dev/null +++ b/google/cloud/datacatalog/v1/physical_schema.proto @@ -0,0 +1,83 @@ +// Copyright 2022 Google LLC +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +syntax = "proto3"; + +package google.cloud.datacatalog.v1; + +option cc_enable_arenas = true; +option csharp_namespace = "Google.Cloud.DataCatalog.V1"; +option go_package = "google.golang.org/genproto/googleapis/cloud/datacatalog/v1;datacatalog"; +option java_multiple_files = true; +option java_outer_classname = "PhysicalSchemaProto"; +option java_package = "com.google.cloud.datacatalog.v1"; +option php_namespace = "Google\\Cloud\\DataCatalog\\V1"; +option ruby_package = "Google::Cloud::DataCatalog::V1"; + +// Native schema used by a resource represented as an entry. Used by query +// engines for deserializing and parsing source data. +message PhysicalSchema { + // Schema in Avro JSON format. + message AvroSchema { + // JSON source of the Avro schema. + string text = 1; + } + + // Schema in Thrift format. + message ThriftSchema { + // Thrift IDL source of the schema. + string text = 1; + } + + // Schema in protocol buffer format. + message ProtobufSchema { + // Protocol buffer source of the schema. + string text = 1; + } + + // Marks a Parquet-encoded data source. + message ParquetSchema { + + } + + // Marks an ORC-encoded data source. + message OrcSchema { + + } + + // Marks a CSV-encoded data source. + message CsvSchema { + + } + + oneof schema { + // Schema in Avro JSON format. + AvroSchema avro = 1; + + // Schema in Thrift format. + ThriftSchema thrift = 2; + + // Schema in protocol buffer format. + ProtobufSchema protobuf = 3; + + // Marks a Parquet-encoded data source. + ParquetSchema parquet = 4; + + // Marks an ORC-encoded data source. + OrcSchema orc = 5; + + // Marks a CSV-encoded data source. + CsvSchema csv = 6; + } +} diff --git a/google/cloud/datacatalog/v1/policytagmanager.proto b/google/cloud/datacatalog/v1/policytagmanager.proto index fa1ae6f8ee2b8..be415988404bf 100644 --- a/google/cloud/datacatalog/v1/policytagmanager.proto +++ b/google/cloud/datacatalog/v1/policytagmanager.proto @@ -229,6 +229,8 @@ message Taxonomy { // The name can't start or end with spaces, must contain only Unicode letters, // numbers, underscores, dashes, and spaces, and be at most 200 bytes long // when encoded in UTF-8. + // + // The taxonomy display name must be unique within an organization. string display_name = 2 [(google.api.field_behavior) = REQUIRED]; // Optional. Description of this taxonomy. If not set, defaults to empty. diff --git a/google/cloud/datacatalog/v1/policytagmanagerserialization.proto b/google/cloud/datacatalog/v1/policytagmanagerserialization.proto index b741539b71317..a4fe4d8b9c9c0 100644 --- a/google/cloud/datacatalog/v1/policytagmanagerserialization.proto +++ b/google/cloud/datacatalog/v1/policytagmanagerserialization.proto @@ -21,7 +21,6 @@ import "google/api/client.proto"; import "google/api/field_behavior.proto"; import "google/api/resource.proto"; import "google/cloud/datacatalog/v1/policytagmanager.proto"; -import "google/iam/v1/policy.proto"; option cc_enable_arenas = true; option csharp_namespace = "Google.Cloud.DataCatalog.V1"; diff --git a/google/cloud/datacatalog/v1/tags.proto b/google/cloud/datacatalog/v1/tags.proto index efe8e8092ff05..dedfd2f61f100 100644 --- a/google/cloud/datacatalog/v1/tags.proto +++ b/google/cloud/datacatalog/v1/tags.proto @@ -164,7 +164,7 @@ message TagTemplate { // [ListTags][google.cloud.datacatalog.v1.ListTags] API response. // // Additionally, you can search for a public tag by value with a - // simple search query instead of using a ``tag:`` predicate. + // simple search query in addition to using a ``tag:`` predicate. bool is_publicly_readable = 5; // Required. Map of tag template field IDs to the settings for the field.