Skip to content

Commit

Permalink
Add DLP code samples for custom info types [(#1524)](GoogleCloudPlatf…
Browse files Browse the repository at this point in the history
…orm/python-docs-samples#1524)

* Add custom info type samples to inspect_content.py

Use flags to indicate dictionary word lists and regex patterns, then parse them into custom info types.

* Make code compatible with python 2.7

* Add missing commas

* Remove bad import

* Add tests for custom info types

* Add info_types parameter to deid.py

* Update deid tests to use info_types parameter

* Fix indentation

* Add blank lines

* Share logic for building custom info types

* Fix line too long

* Fix typo.

* Revert "Fix typo."

This reverts commit b4ffea6eef1fc2ccd2a4f17adb6e9492e54f1b76, so that
the sharing of the custom info type logic can be reverted as well to
make the code samples more readable.

* Revert "Share logic for building custom info types"

This reverts commit 47fc04f74c77db3bd5397459cf9242dc11521c37. This makes
the code samples more readable.

* Switch from indexes to using enumerate.

* Updated help message for custom dictionaries.

* Fix enumerate syntax error.
  • Loading branch information
mwdaub authored and andrewsg committed Jul 3, 2018
1 parent 29f10f4 commit 68b5872
Show file tree
Hide file tree
Showing 4 changed files with 268 additions and 12 deletions.
39 changes: 33 additions & 6 deletions samples/snippets/deid.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@


# [START dlp_deidentify_masking]
def deidentify_with_mask(project, string, masking_character=None,
def deidentify_with_mask(project, string, info_types, masking_character=None,
number_to_mask=0):
"""Uses the Data Loss Prevention API to deidentify sensitive data in a
string by masking it with a character.
Expand All @@ -44,6 +44,11 @@ def deidentify_with_mask(project, string, masking_character=None,
# Convert the project id into a full resource id.
parent = dlp.project_path(project)

# Construct inspect configuration dictionary
inspect_config = {
'info_types': [{'name': info_type} for info_type in info_types]
}

# Construct deidentify configuration dictionary
deidentify_config = {
'info_type_transformations': {
Expand All @@ -65,15 +70,16 @@ def deidentify_with_mask(project, string, masking_character=None,

# Call the API
response = dlp.deidentify_content(
parent, deidentify_config=deidentify_config, item=item)
parent, inspect_config=inspect_config,
deidentify_config=deidentify_config, item=item)

# Print out the results.
print(response.item.value)
# [END dlp_deidentify_masking]


# [START dlp_deidentify_fpe]
def deidentify_with_fpe(project, string, alphabet=None,
def deidentify_with_fpe(project, string, info_types, alphabet=None,
surrogate_type=None, key_name=None, wrapped_key=None):
"""Uses the Data Loss Prevention API to deidentify sensitive data in a
string using Format Preserving Encryption (FPE).
Expand Down Expand Up @@ -127,6 +133,11 @@ def deidentify_with_fpe(project, string, alphabet=None,
'name': surrogate_type
}

# Construct inspect configuration dictionary
inspect_config = {
'info_types': [{'name': info_type} for info_type in info_types]
}

# Construct deidentify configuration dictionary
deidentify_config = {
'info_type_transformations': {
Expand All @@ -146,7 +157,8 @@ def deidentify_with_fpe(project, string, alphabet=None,

# Call the API
response = dlp.deidentify_content(
parent, deidentify_config=deidentify_config, item=item)
parent, inspect_config=inspect_config,
deidentify_config=deidentify_config, item=item)

# Print results
print(response.item.value)
Expand Down Expand Up @@ -404,6 +416,13 @@ def write_data(data):
'deid_mask',
help='Deidentify sensitive data in a string by masking it with a '
'character.')
mask_parser.add_argument(
'--info_types', action='append',
help='Strings representing info types to look for. A full list of '
'info categories and types is available from the API. Examples '
'include "FIRST_NAME", "LAST_NAME", "EMAIL_ADDRESS". '
'If unspecified, the three above examples will be used.',
default=['FIRST_NAME', 'LAST_NAME', 'EMAIL_ADDRESS'])
mask_parser.add_argument(
'project',
help='The Google Cloud project id to use as a parent resource.')
Expand All @@ -423,6 +442,13 @@ def write_data(data):
'deid_fpe',
help='Deidentify sensitive data in a string using Format Preserving '
'Encryption (FPE).')
fpe_parser.add_argument(
'--info_types', action='append',
help='Strings representing info types to look for. A full list of '
'info categories and types is available from the API. Examples '
'include "FIRST_NAME", "LAST_NAME", "EMAIL_ADDRESS". '
'If unspecified, the three above examples will be used.',
default=['FIRST_NAME', 'LAST_NAME', 'EMAIL_ADDRESS'])
fpe_parser.add_argument(
'project',
help='The Google Cloud project id to use as a parent resource.')
Expand Down Expand Up @@ -532,11 +558,12 @@ def write_data(data):
args = parser.parse_args()

if args.content == 'deid_mask':
deidentify_with_mask(args.project, args.item,
deidentify_with_mask(args.project, args.item, args.info_types,
masking_character=args.masking_character,
number_to_mask=args.number_to_mask)
elif args.content == 'deid_fpe':
deidentify_with_fpe(args.project, args.item, alphabet=args.alphabet,
deidentify_with_fpe(args.project, args.item, args.info_types,
alphabet=args.alphabet,
wrapped_key=args.wrapped_key,
key_name=args.key_name,
surrogate_type=args.surrogate_type)
Expand Down
10 changes: 8 additions & 2 deletions samples/snippets/deid_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,8 @@ def tempdir():


def test_deidentify_with_mask(capsys):
deid.deidentify_with_mask(GCLOUD_PROJECT, HARMFUL_STRING)
deid.deidentify_with_mask(GCLOUD_PROJECT, HARMFUL_STRING,
['US_SOCIAL_SECURITY_NUMBER'])

out, _ = capsys.readouterr()
assert 'My SSN is *********' in out
Expand All @@ -60,14 +61,17 @@ def test_deidentify_with_mask_masking_character_specified(capsys):
deid.deidentify_with_mask(
GCLOUD_PROJECT,
HARMFUL_STRING,
['US_SOCIAL_SECURITY_NUMBER'],
masking_character='#')

out, _ = capsys.readouterr()
assert 'My SSN is #########' in out


def test_deidentify_with_mask_masking_number_specified(capsys):
deid.deidentify_with_mask(GCLOUD_PROJECT, HARMFUL_STRING, number_to_mask=7)
deid.deidentify_with_mask(GCLOUD_PROJECT, HARMFUL_STRING,
['US_SOCIAL_SECURITY_NUMBER'],
number_to_mask=7)

out, _ = capsys.readouterr()
assert 'My SSN is *******27' in out
Expand All @@ -77,6 +81,7 @@ def test_deidentify_with_fpe(capsys):
deid.deidentify_with_fpe(
GCLOUD_PROJECT,
HARMFUL_STRING,
['US_SOCIAL_SECURITY_NUMBER'],
alphabet='NUMERIC',
wrapped_key=WRAPPED_KEY,
key_name=KEY_NAME)
Expand All @@ -90,6 +95,7 @@ def test_deidentify_with_fpe_uses_surrogate_info_types(capsys):
deid.deidentify_with_fpe(
GCLOUD_PROJECT,
HARMFUL_STRING,
['US_SOCIAL_SECURITY_NUMBER'],
alphabet='NUMERIC',
wrapped_key=WRAPPED_KEY,
key_name=KEY_NAME,
Expand Down
Loading

0 comments on commit 68b5872

Please sign in to comment.