In [0]:
%python
from databricks.sdk import WorkspaceClient

ws = WorkspaceClient()

catalogs = [x.full_name for x in list(ws.catalogs.list())]
dbutils.widgets.dropdown("catalog", defaultValue=catalogs[0], choices=catalogs)
dbutils.widgets.text("schema", defaultValue="system")

In [0]:
CREATE SCHEMA IF NOT EXISTS IDENTIFIER(CONCAT(:catalog, '.', :schema))

## Create the column mask functions

In [0]:
CREATE OR REPLACE FUNCTION IDENTIFIER(:catalog || '.' || :schema || '.redact_string')(col STRING) 
RETURNS STRING 
RETURN '[REDACTED]'

In [0]:
CREATE OR REPLACE FUNCTION IDENTIFIER(:catalog || '.' || :schema || '.redact_name')(col STRING) 
RETURNS STRING 
RETURN '[REDACTED_NAME]'

In [0]:
CREATE OR REPLACE FUNCTION IDENTIFIER(:catalog || '.' || :schema || '.redact_email')(col STRING) 
RETURNS STRING 
RETURN regexp_replace(CAST(col AS STRING), '[\\w\\.=-]+@', '')

In [0]:
CREATE OR REPLACE FUNCTION IDENTIFIER(:catalog || '.' || :schema || '.redact_ip_address')(col STRING) 
RETURNS STRING 
RETURN 
CASE 
    WHEN col RLIKE '^([0-9]{1,3}\\.){3}[0-9]{1,3}$' THEN concat(substring_index(CAST(col AS STRING), '.', 3), '.0/24')
    WHEN col RLIKE '^([0-9a-fA-F]{1,4}:){7}[0-9a-fA-F]{1,4}$' THEN regexp_replace(col, '([^:]*):([^:]*)$', 'XXXX:XXXX')
    ELSE '[REDACTED_IP_ADDRESS]'
END

In [0]:
CREATE OR REPLACE FUNCTION IDENTIFIER(:catalog || '.' || :schema || '.redact_substring')(col STRING) 
RETURNS STRING 
RETURN concat(substr(col, 0, 2), mask(substr(col, 2, length(col) - 2), upperChar=>'X', lowerChar=>'X', digitChar =>'X'), substr(col, -2, 2)) 

In [0]:
CREATE OR REPLACE FUNCTION IDENTIFIER(:catalog || '.' || :schema || '.redact_endstring')(col STRING) 
RETURNS STRING 
RETURN concat(mask(substr(col, 0, length(col) - 3),  upperChar=>'X', lowerChar=>'X', digitChar =>'X'), substr(col, -3, 3))

In [0]:
CREATE OR REPLACE FUNCTION IDENTIFIER(:catalog || '.' || :schema || '.redact_digits')(col STRING) 
RETURNS STRING 
RETURN mask(col, digitChar =>'*')

In [0]:
CREATE OR REPLACE FUNCTION IDENTIFIER(:catalog || '.' || :schema || '.mask_string')(col STRING) 
RETURNS STRING 
RETURN mask(col)

## Create the ABAC policies

In [0]:
%python
catalog = dbutils.widgets.get('catalog')
schema = dbutils.widgets.get('schema')
pii_viewer = f"{catalog}.pii_viewer"

group = next((g for g in ws.groups.list() if g.display_name == pii_viewer), None)
if group is None:
    raise Exception(f"{pii_viewer} group does not exist. Please create it as an account level group before running the steps below!")

In [0]:
%python
sql(f"""
    CREATE POLICY redact_name
    ON CATALOG `{catalog}`
    COMMENT 'Redact names by replacing the PII with [REDACTED_NAME]'
    COLUMN MASK {catalog}.{schema}.redact_name
    TO `account users`
    EXCEPT `{pii_viewer}`
    FOR TABLES
    MATCH COLUMNS
        hasTag('class.name') AS name
    ON COLUMN name;
    """)

In [0]:
%python
sql(f"""
    CREATE POLICY redact_email
    ON CATALOG `{catalog}`
    COMMENT 'Redact email addresses so that only the domain is visible'
    COLUMN MASK {catalog}.{schema}.redact_email
    TO `account users`
    EXCEPT `{pii_viewer}`
    FOR TABLES
    MATCH COLUMNS
        hasTag('class.email_address') AS email
    ON COLUMN email;
    """)

In [0]:
%python
sql(f"""
    CREATE POLICY redact_phone_number
    ON CATALOG `{catalog}`
    COMMENT 'Redact US phone numbers by replacing all digits with "*"'
    COLUMN MASK {catalog}.{schema}.redact_digits
    TO `account users`
    EXCEPT `{pii_viewer}`
    FOR TABLES
    MATCH COLUMNS
        hasTag('class.phone_number') AS phone
    ON COLUMN phone;
    """)

In [0]:
%python
sql(f"""
    CREATE POLICY redact_ip_address
    ON CATALOG `{catalog}`
    COMMENT 'Redact IP v4 and v6 addresses via truncation of the last few octets'
    COLUMN MASK {catalog}.{schema}.redact_ip_address
    TO `account users`
    EXCEPT `{pii_viewer}`
    FOR TABLES
    MATCH COLUMNS
        hasTag('class.ip_address') AS ip
    ON COLUMN ip;
    """)

In [0]:
%python
sql(f"""
    CREATE POLICY redact_location
    ON CATALOG `{catalog}`
    COMMENT 'Redact addresses and other locations by masking upper, lower and numeric characters'
    COLUMN MASK {catalog}.{schema}.mask_string
    TO `account users`
    EXCEPT `{pii_viewer}`
    FOR TABLES
    MATCH COLUMNS
        hasTag('class.location') AS location
    ON COLUMN location;
    """)

In [0]:
%python
sql(f"""
    CREATE POLICY redact_bank_number
    ON CATALOG `{catalog}`
    COMMENT 'Redact US bank numbers by masking the substring whilst retaining the first and last 2 characters'
    COLUMN MASK {catalog}.{schema}.redact_substring
    TO `account users`
    EXCEPT `{pii_viewer}`
    FOR TABLES
    MATCH COLUMNS
        hasTag('class.us_bank_number') AS bank_number
    ON COLUMN bank_number;
    """)

In [0]:
%python
sql(f"""
    CREATE POLICY redact_ssn
    ON CATALOG `{catalog}`
    COMMENT 'Redact US SSNs by replacing all digits with "*"'
    COLUMN MASK {catalog}.{schema}.redact_digits
    TO `account users`
    EXCEPT `{pii_viewer}`
    FOR TABLES
    MATCH COLUMNS
        hasTag('class.us_ssn') AS ssn
    ON COLUMN ssn;
    """)

In [0]:
%python
sql(f"""
    CREATE POLICY redact_itin
    ON CATALOG `{catalog}`
    COMMENT 'Redact US ITINs by replacing all digits with "*"'
    COLUMN MASK {catalog}.{schema}.redact_digits
    TO `account users`
    EXCEPT `{pii_viewer}`
    FOR TABLES
    MATCH COLUMNS
        hasTag('class.us_itin') AS ssn
    ON COLUMN ssn;
    """)

In [0]:
%python
sql(f"""
    CREATE POLICY redact_iban
    ON CATALOG `{catalog}`
    COMMENT 'Redact IBAN codes by masking the substring whilst retaining the first and last 2 characters'
    COLUMN MASK {catalog}.{schema}.redact_substring
    TO `account users`
    EXCEPT `{pii_viewer}`
    FOR TABLES
    MATCH COLUMNS
        hasTag('class.iban_code') AS iban
    ON COLUMN iban;
    """)

In [0]:
%python
sql(f"""
    CREATE POLICY redact_credit_card
    ON CATALOG `{catalog}`
    COMMENT 'Redact credit card numbers codes by masking the substring whilst retaining the last 3 characters. NB - this is used for example purposes, credit card numbers should be tokenised or otherwise appropriately protected!'
    COLUMN MASK {catalog}.{schema}.redact_endstring
    TO `account users`
    EXCEPT `{pii_viewer}`
    FOR TABLES
    MATCH COLUMNS
        hasTag('class.credit_card') AS cc
    ON COLUMN cc;
    """)