Skip to content
This repository has been archived by the owner on Dec 10, 2023. It is now read-only.

feat: new Bytes and File types POWERPOINT and EXCEL #355

Merged
merged 2 commits into from
Mar 25, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 12 additions & 11 deletions google/cloud/dlp_v2/types/dlp.py
Original file line number Diff line number Diff line change
Expand Up @@ -189,9 +189,7 @@ class MatchingType(proto.Enum):


class ContentOption(proto.Enum):
r"""Options describing which parts of the provided content should
be scanned.
"""
r"""Deprecated and unused."""
CONTENT_UNSPECIFIED = 0
CONTENT_TEXT = 1
CONTENT_IMAGE = 2
Expand Down Expand Up @@ -368,22 +366,22 @@ class InspectConfig(proto.Message):
to learn more.
limits (google.cloud.dlp_v2.types.InspectConfig.FindingLimits):
Configuration to control the number of
findings returned.
findings returned. This is not used for data
profiling.
include_quote (bool):
When true, a contextual quote from the data
that triggered a finding is included in the
response; see Finding.quote.
response; see Finding.quote. This is not used
for data profiling.
exclude_info_types (bool):
When true, excludes type information of the
findings.
findings. This is not used for data profiling.
custom_info_types (Sequence[google.cloud.dlp_v2.types.CustomInfoType]):
CustomInfoTypes provided by the user. See
https://cloud.google.com/dlp/docs/creating-custom-infotypes
to learn more.
content_options (Sequence[google.cloud.dlp_v2.types.ContentOption]):
List of options defining data content to
scan. If empty, text, images, and other content
will be included.
Deprecated and unused.
rule_set (Sequence[google.cloud.dlp_v2.types.InspectionRuleSet]):
Set of rules to apply to the findings for
this InspectConfig. Exclusion rules, contained
Expand All @@ -393,8 +391,9 @@ class InspectConfig(proto.Message):
"""

class FindingLimits(proto.Message):
r"""Configuration to control the number of findings returned.
Cannot be set if de-identification is requested.
r"""Configuration to control the number of findings returned for
inspection. This is not used for de-identification or data
profiling.

Attributes:
max_findings_per_item (int):
Expand Down Expand Up @@ -476,6 +475,8 @@ class BytesType(proto.Enum):
TEXT_UTF8 = 5
WORD_DOCUMENT = 7
PDF = 8
POWERPOINT_DOCUMENT = 9
EXCEL_DOCUMENT = 10
AVRO = 11
CSV = 12
TSV = 13
Expand Down
43 changes: 19 additions & 24 deletions google/cloud/dlp_v2/types/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,8 @@ class FileType(proto.Enum):
AVRO = 7
CSV = 8
TSV = 9
POWERPOINT = 11
EXCEL = 12


class InfoType(proto.Message):
Expand Down Expand Up @@ -188,23 +190,20 @@ class Dictionary(proto.Message):
Plane <https://en.wikipedia.org/wiki/Plane_%28Unicode%29#Basic_Multilingual_Plane>`__
will be replaced with whitespace when scanning for matches, so the
dictionary phrase "Sam Johnson" will match all three phrases "sam
johnson",
Plane](https://en.wikipedia.org/wiki/Plane_%28Unicode%29#Basic_Multilingual_Plane)
surrounding any match must be of a different type than the adjacent
characters within the word, so letters must be next to non-letters
and digits next to non-digits. For example, the dictionary word
"jen" will match the first three letters of the text "jen123" but
will return no matches for "jennifer".
johnson", "Sam, Johnson", and "Sam (Johnson)". Additionally, the
characters surrounding any match must be of a different type than
the adjacent characters within the word, so letters must be next to
non-letters and digits next to non-digits. For example, the
dictionary word "jen" will match the first three letters of the text
"jen123" but will return no matches for "jennifer".

Dictionary words containing a large number of characters that are
not letters or digits may result in unexpected findings because such
characters are treated as whitespace. The
`limits <https://cloud.google.com/dlp/limits>`__ page contains
details about the size limits of dictionaries. For dictionaries that
do not fit within these constraints, consider using
``LargeCustomDictionaryConfig`` in the
`limits <https://cloud.google.com/dlp/limits>`__ page contains
details about
``LargeCustomDictionaryConfig`` in the ``StoredInfoType`` API.

This message has `oneof`_ fields (mutually exclusive fields).
For each oneof, at most one member field can be set at the same time.
Expand Down Expand Up @@ -260,11 +259,9 @@ class Regex(proto.Message):
be found under the google/re2 repository on
GitHub.
group_indexes (Sequence[int]):
(https://github.com/google/re2/wiki/Syntax)
can be found under the The index of the submatch
to extract as findings. When not specified, the
entire match is returned. No more than 3 may be
included.
The index of the submatch to extract as
findings. When not specified, the entire match
is returned. No more than 3 may be included.
"""

pattern = proto.Field(proto.STRING, number=1,)
Expand All @@ -275,10 +272,10 @@ class SurrogateType(proto.Message):
such as
```CryptoReplaceFfxFpeConfig`` <https://cloud.google.com/dlp/docs/reference/rest/v2/organizations.deidentifyTemplates#cryptoreplaceffxfpeconfig>`__.
These types of transformations are those that perform
pseudonymization, thereby producing a "surrogate" as
```CryptoReplaceFfxFpeConfig`` <https://cloud.google.com/dlp/docs/reference/rest/v2/organizations.deidentifyTemplates#cryptoreplaceffxfpeconfig>`__.
transformation such as ``surrogate_info_type``. This CustomInfoType
does not support the use of ``detection_rules``.
pseudonymization, thereby producing a "surrogate" as output. This
should be used in conjunction with a field on the transformation
such as ``surrogate_info_type``. This CustomInfoType does not
support the use of ``detection_rules``.

"""

Expand Down Expand Up @@ -527,11 +524,9 @@ class CloudStorageRegexFileSet(proto.Message):
guide can be found under the google/re2 repository on
GitHub.
exclude_regex (Sequence[str]):
`syntax <https://github.com/google/re2/wiki/Syntax>`__; a
guide can be found A list of regular expressions matching
file paths to exclude. All files in the bucket that match at
least one of these regular expressions will be excluded from
the scan.
A list of regular expressions matching file paths to
exclude. All files in the bucket that match at least one of
these regular expressions will be excluded from the scan.

Regular expressions use RE2
`syntax <https://github.com/google/re2/wiki/Syntax>`__; a
Expand Down