Skip to content
This repository has been archived by the owner on Dec 10, 2023. It is now read-only.

feat: InfoType categories were added to built-in infoTypes #409

Merged
merged 13 commits into from
Jul 16, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions google/cloud/dlp/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -81,6 +81,7 @@
HybridInspectResponse,
HybridInspectStatistics,
ImageLocation,
InfoTypeCategory,
InfoTypeDescription,
InfoTypeStats,
InfoTypeSummary,
Expand Down Expand Up @@ -255,6 +256,7 @@
"HybridInspectResponse",
"HybridInspectStatistics",
"ImageLocation",
"InfoTypeCategory",
"InfoTypeDescription",
"InfoTypeStats",
"InfoTypeSummary",
Expand Down
2 changes: 2 additions & 0 deletions google/cloud/dlp_v2/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@
HybridInspectResponse,
HybridInspectStatistics,
ImageLocation,
InfoTypeCategory,
InfoTypeDescription,
InfoTypeStats,
InfoTypeSummary,
Expand Down Expand Up @@ -273,6 +274,7 @@
"HybridOptions",
"ImageLocation",
"InfoType",
"InfoTypeCategory",
"InfoTypeDescription",
"InfoTypeStats",
"InfoTypeSummary",
Expand Down
1 change: 1 addition & 0 deletions google/cloud/dlp_v2/services/dlp_service/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -549,6 +549,7 @@ def __init__(
quota_project_id=client_options.quota_project_id,
client_info=client_info,
always_use_jwt_access=True,
api_audience=client_options.api_audience,
)

def inspect_content(
Expand Down
16 changes: 11 additions & 5 deletions google/cloud/dlp_v2/services/dlp_service/transports/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ def __init__(
quota_project_id: Optional[str] = None,
client_info: gapic_v1.client_info.ClientInfo = DEFAULT_CLIENT_INFO,
always_use_jwt_access: Optional[bool] = False,
api_audience: Optional[str] = None,
**kwargs,
) -> None:
"""Instantiate the transport.
Expand Down Expand Up @@ -82,11 +83,6 @@ def __init__(
be used for service account credentials.
"""

# Save the hostname. Default to port 443 (HTTPS) if none is specified.
if ":" not in host:
host += ":443"
self._host = host

scopes_kwargs = {"scopes": scopes, "default_scopes": self.AUTH_SCOPES}

# Save the scopes.
Expand All @@ -107,6 +103,11 @@ def __init__(
credentials, _ = google.auth.default(
**scopes_kwargs, quota_project_id=quota_project_id
)
# Don't apply audience if the credentials file passed from user.
if hasattr(credentials, "with_gdch_audience"):
credentials = credentials.with_gdch_audience(
api_audience if api_audience else host
)

# If the credentials are service account credentials, then always try to use self signed JWT.
if (
Expand All @@ -119,6 +120,11 @@ def __init__(
# Save the credentials.
self._credentials = credentials

# Save the hostname. Default to port 443 (HTTPS) if none is specified.
if ":" not in host:
host += ":443"
self._host = host

def _prep_wrapped_messages(self, client_info):
# Precompute the wrapped methods.
self._wrapped_methods = {
Expand Down
2 changes: 2 additions & 0 deletions google/cloud/dlp_v2/services/dlp_service/transports/grpc.py
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ def __init__(
quota_project_id: Optional[str] = None,
client_info: gapic_v1.client_info.ClientInfo = DEFAULT_CLIENT_INFO,
always_use_jwt_access: Optional[bool] = False,
api_audience: Optional[str] = None,
) -> None:
"""Instantiate the transport.

Expand Down Expand Up @@ -162,6 +163,7 @@ def __init__(
quota_project_id=quota_project_id,
client_info=client_info,
always_use_jwt_access=always_use_jwt_access,
api_audience=api_audience,
)

if not self._grpc_channel:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ def __init__(
quota_project_id=None,
client_info: gapic_v1.client_info.ClientInfo = DEFAULT_CLIENT_INFO,
always_use_jwt_access: Optional[bool] = False,
api_audience: Optional[str] = None,
) -> None:
"""Instantiate the transport.

Expand Down Expand Up @@ -207,6 +208,7 @@ def __init__(
quota_project_id=quota_project_id,
client_info=client_info,
always_use_jwt_access=always_use_jwt_access,
api_audience=api_audience,
)

if not self._grpc_channel:
Expand Down
2 changes: 2 additions & 0 deletions google/cloud/dlp_v2/types/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@
HybridInspectResponse,
HybridInspectStatistics,
ImageLocation,
InfoTypeCategory,
InfoTypeDescription,
InfoTypeStats,
InfoTypeSummary,
Expand Down Expand Up @@ -250,6 +251,7 @@
"HybridInspectResponse",
"HybridInspectStatistics",
"ImageLocation",
"InfoTypeCategory",
"InfoTypeDescription",
"InfoTypeStats",
"InfoTypeSummary",
Expand Down
133 changes: 129 additions & 4 deletions google/cloud/dlp_v2/types/dlp.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@
"InspectDataSourceDetails",
"HybridInspectStatistics",
"InfoTypeDescription",
"InfoTypeCategory",
"ListInfoTypesRequest",
"ListInfoTypesResponse",
"RiskAnalysisJobConfig",
Expand Down Expand Up @@ -423,10 +424,10 @@ class InspectConfig(proto.Message):
findings returned. This is not used for data
profiling.
include_quote (bool):
When true, a contextual quote from the data
that triggered a finding is included in the
response; see Finding.quote. This is not used
for data profiling.
When true, a contextual quote from the data that triggered a
finding is included in the response; see
[Finding.quote][google.privacy.dlp.v2.Finding.quote]. This
is not used for data profiling.
exclude_info_types (bool):
When true, excludes type information of the
findings. This is not used for data profiling.
Expand Down Expand Up @@ -1907,6 +1908,8 @@ class InfoTypeDescription(proto.Message):
description (str):
Description of the infotype. Translated when
language is provided in the request.
categories (Sequence[google.cloud.dlp_v2.types.InfoTypeCategory]):
The category of the infoType.
"""

name = proto.Field(
Expand All @@ -1926,6 +1929,128 @@ class InfoTypeDescription(proto.Message):
proto.STRING,
number=4,
)
categories = proto.RepeatedField(
proto.MESSAGE,
number=10,
message="InfoTypeCategory",
)


class InfoTypeCategory(proto.Message):
r"""Classification of infoTypes to organize them according to
geographic location, industry, and data type.

This message has `oneof`_ fields (mutually exclusive fields).
For each oneof, at most one member field can be set at the same time.
Setting any member of the oneof automatically clears all other
members.

.. _oneof: https://proto-plus-python.readthedocs.io/en/stable/fields.html#oneofs-mutually-exclusive-fields

Attributes:
location_category (google.cloud.dlp_v2.types.InfoTypeCategory.LocationCategory):
The region or country that issued the ID or
document represented by the infoType.

This field is a member of `oneof`_ ``category``.
industry_category (google.cloud.dlp_v2.types.InfoTypeCategory.IndustryCategory):
The group of relevant businesses where this
infoType is commonly used

This field is a member of `oneof`_ ``category``.
type_category (google.cloud.dlp_v2.types.InfoTypeCategory.TypeCategory):
The class of identifiers where this infoType
belongs

This field is a member of `oneof`_ ``category``.
"""

class LocationCategory(proto.Enum):
r"""Enum of the current locations.
We might add more locations in the future.
"""
LOCATION_UNSPECIFIED = 0
GLOBAL = 1
ARGENTINA = 2
AUSTRALIA = 3
BELGIUM = 4
BRAZIL = 5
CANADA = 6
CHILE = 7
CHINA = 8
COLOMBIA = 9
DENMARK = 10
FRANCE = 11
FINLAND = 12
GERMANY = 13
HONG_KONG = 14
INDIA = 15
INDONESIA = 16
IRELAND = 17
ISRAEL = 18
ITALY = 19
JAPAN = 20
KOREA = 21
MEXICO = 22
THE_NETHERLANDS = 23
NORWAY = 24
PARAGUAY = 25
PERU = 26
POLAND = 27
PORTUGAL = 28
SINGAPORE = 29
SOUTH_AFRICA = 30
SPAIN = 31
SWEDEN = 32
TAIWAN = 33
THAILAND = 34
TURKEY = 35
UNITED_KINGDOM = 36
UNITED_STATES = 37
URUGUAY = 38
VENEZUELA = 39
INTERNAL = 40

class IndustryCategory(proto.Enum):
r"""Enum of the current industries in the category.
We might add more industries in the future.
"""
INDUSTRY_UNSPECIFIED = 0
FINANCE = 1
HEALTH = 2
TELECOMMUNICATIONS = 3

class TypeCategory(proto.Enum):
r"""Enum of the current types in the category.
We might add more types in the future.
"""
TYPE_UNSPECIFIED = 0
PII = 1
SPII = 2
DEMOGRAPHIC = 3
CREDENTIAL = 4
GOVERNMENT_ID = 5
DOCUMENT = 6
CONTEXTUAL_INFORMATION = 7

location_category = proto.Field(
proto.ENUM,
number=1,
oneof="category",
enum=LocationCategory,
)
industry_category = proto.Field(
proto.ENUM,
number=2,
oneof="category",
enum=IndustryCategory,
)
type_category = proto.Field(
proto.ENUM,
number=3,
oneof="category",
enum=TypeCategory,
)


class ListInfoTypesRequest(proto.Message):
Expand Down
34 changes: 20 additions & 14 deletions google/cloud/dlp_v2/types/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -324,8 +324,12 @@ class Proximity(proto.Message):

Attributes:
window_before (int):
Number of characters before the finding to
consider.
Number of characters before the finding to consider. For
tabular data, if you want to modify the likelihood of an
entire column of findngs, set this to 1. For more
information, see [Hotword example: Set the match likelihood
of a table column]
(https://cloud.google.com/dlp/docs/creating-custom-infotypes-likelihood#match-column-values).
window_after (int):
Number of characters after the finding to
consider.
Expand Down Expand Up @@ -392,18 +396,20 @@ class HotwordRule(proto.Message):
Regular expression pattern defining what
qualifies as a hotword.
proximity (google.cloud.dlp_v2.types.CustomInfoType.DetectionRule.Proximity):
Proximity of the finding within which the
entire hotword must reside. The total length of
the window cannot exceed 1000 characters. Note
that the finding itself will be included in the
window, so that hotwords may be used to match
substrings of the finding itself. For example,
the certainty of a phone number regex "\(\d{3}\)
\d{3}-\d{4}" could be adjusted upwards if the
area code is known to be the local area code of
a company office using the hotword regex
"\(xxx\)", where "xxx" is the area code in
question.
Range of characters within which the entire hotword must
reside. The total length of the window cannot exceed 1000
characters. The finding itself will be included in the
window, so that hotwords can be used to match substrings of
the finding itself. Suppose you want Cloud DLP to promote
the likelihood of the phone number regex "(\d{3})
\\d{3}-\d{4}" if the area code is known to be the area code
of a company's office. In this case, use the hotword regex
"(xxx)", where "xxx" is the area code in question.

For tabular data, if you want to modify the likelihood of an
entire column of findngs, see [Hotword example: Set the
match likelihood of a table column]
(https://cloud.google.com/dlp/docs/creating-custom-infotypes-likelihood#match-column-values).
likelihood_adjustment (google.cloud.dlp_v2.types.CustomInfoType.DetectionRule.LikelihoodAdjustment):
Likelihood adjustment to apply to all
matching findings.
Expand Down
5 changes: 1 addition & 4 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,7 @@
version = "3.7.1"
release_status = "Development Status :: 5 - Production/Stable"
dependencies = [
# NOTE: Maintainers, please do not require google-api-core>=2.x.x
# Until this issue is closed
# https://github.com/googleapis/google-cloud-python/issues/10566
"google-api-core[grpc] >= 1.31.5, <3.0.0dev,!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.0",
"google-api-core[grpc] >= 1.32.0, <3.0.0dev,!=2.0.*,!=2.1.*,!=2.2.*,!=2.3.*,!=2.4.*,!=2.5.*,!=2.6.*,!=2.7.*",
"proto-plus >= 1.15.0, <2.0.0dev",
"protobuf >= 3.19.0, <4.0.0dev",
]
Expand Down
11 changes: 0 additions & 11 deletions testing/constraints-3.6.txt

This file was deleted.

2 changes: 1 addition & 1 deletion testing/constraints-3.7.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
#
# e.g., if setup.py has "foo >= 1.14.0, < 2.0.0dev",
# Then this file should have foo==1.14.0
google-api-core==1.31.5
google-api-core==1.32.0
proto-plus==1.15.0
libcst==0.2.5
protobuf==3.19.0
Loading