In [None]:
# mark cell_id=gretel_transform_deps

In [None]:
# mark: cell_id=gretel_transform_boilerplate

from helpers import setup_notebook
setup_notebook()

# All Transformers

In [None]:
import hashlib
import json
from urllib.request import urlopen

# import and list all available transformers
import gretel_client.transformers.transformers
gretel_client.transformers.transformers.__all__

In [None]:
sample_data = urlopen(
    'https://gretel-public-website.s3.us-west-2.amazonaws.com/datasets/notebooks/sample_transformers_data.json'
)
data = json.loads(sample_data.read())

# use this 256 bit sha throughout the demo
sha256 = hashlib.sha256("my demo secret".encode()).hexdigest()

In [None]:
from gretel_helpers.streaming_view import stream_table_view
from gretel_client.transformers.base import FieldRef
from gretel_client.transformers.data_transform_pipeline import DataTransformPipeline, DataPath
from gretel_client.transformers.transformers import BucketConfig, BucketRange

# Let's sort field 'Country' into A-L and M-Z buckets
bucket_range = BucketRange([('A', 'L'), ('M', 'Z')], labels=['A-L', 'M-Z'])
xf_list = [BucketConfig(bucket_range=bucket_range)]
data_paths = [DataPath(input='Country', xforms=xf_list), DataPath(input='*')]
xf = DataTransformPipeline(data_paths)
stream_table_view(data[0], title='Original')
stream_table_view(data[0], xf=xf, title='Transformed', title_color='red')

In [None]:
from gretel_helpers.streaming_view import stream_table_view
from gretel_client.transformers.base import FieldRef
from gretel_client.transformers.data_restore_pipeline import DataRestorePipeline
from gretel_client.transformers.data_transform_pipeline import DataTransformPipeline, DataPath
from gretel_client.transformers.transformers import SecureFpeConfig

# Let's encrypt and decrypt US zipcodes
xf_list = [SecureFpeConfig(labels=['us_zip_code'], secret=sha256, radix=10)]

data_paths = [DataPath(input='*', xforms=xf_list)]
xf = DataTransformPipeline(data_paths)
rf = DataRestorePipeline(data_paths)

transformed = xf.transform_record(data[0])
stream_table_view(data[0], title='Original')
stream_table_view(data[0], xf=xf, title='Transformed', title_color='red')

# now let's take the transformations out of the transformed record
restored = rf.transform_record(transformed)
stream_table_view(restored, title='Restored', title_color='green')

In [None]:
from gretel_helpers.streaming_view import stream_table_view
from gretel_client.transformers.base import FieldRef
from gretel_client.transformers.data_restore_pipeline import DataRestorePipeline
from gretel_client.transformers.data_transform_pipeline import DataTransformPipeline, DataPath
from gretel_client.transformers.transformers import SecureFpeConfig

# Let's encrypt and decrypt names (base62 preserves upper-/lower-case)
xf_list = [SecureFpeConfig(secret=sha256, radix=62)]

data_paths = [DataPath(input='Name', xforms=xf_list), DataPath(input='*')]

xf = DataTransformPipeline(data_paths)
rf = DataRestorePipeline(data_paths)

transformed = xf.transform_record(data[0])
stream_table_view(data[0], title='Original')
stream_table_view(data[0], xf=xf, title='Transformed', title_color='red')

# now let's take the transformations out of the transformed record
restored = rf.transform_record(transformed)
stream_table_view(restored, title='Restored', title_color='green')

In [None]:
from gretel_helpers.streaming_view import stream_table_view
from gretel_client.transformers.base import FieldRef
from gretel_client.transformers.data_restore_pipeline import DataRestorePipeline
from gretel_client.transformers.data_transform_pipeline import DataTransformPipeline, DataPath
from gretel_client.transformers.transformers import DateShiftConfig

# Let's shift the date around a bit and then return it to the original date.
xf_list = [
    DateShiftConfig(secret=sha256, lower_range_days=-100, upper_range_days=100)
]

data_paths = [DataPath(input='Date', xforms=xf_list), DataPath(input='*')]

xf = DataTransformPipeline(data_paths)
rf = DataRestorePipeline(data_paths)

transformed = xf.transform_record(data[0])
stream_table_view(data[0], title='Original')
stream_table_view(data[0], xf=xf, title='Transformed', title_color='red')

# now let's take the transformations out of the transformed record
restored = rf.transform_record(transformed)
stream_table_view(restored, title='Restored', title_color='green')

In [None]:
from gretel_helpers.streaming_view import stream_table_view
from gretel_client.transformers.base import FieldRef
from gretel_client.transformers.data_restore_pipeline import DataRestorePipeline
from gretel_client.transformers.data_transform_pipeline import DataTransformPipeline, DataPath
from gretel_client.transformers.transformers import SecureFpeConfig

# Let's encrypt and decrypt anything labeled as longitude/latitude, past the 1st fraction digit.
xf_list = [
    SecureFpeConfig(labels=['longitude', 'latitude'],
                    secret=sha256,
                    radix=10,
                    float_precision=1)
]

data_paths = [DataPath(input='*', xforms=xf_list)]

xf = DataTransformPipeline(data_paths)
rf = DataRestorePipeline(data_paths)

transformed = xf.transform_record(data[0])
stream_table_view(data[0], title='Original')
stream_table_view(data[0], xf=xf, title='Transformed', title_color='red')

# now let's take the transformations out of the transformed record
restored = rf.transform_record(transformed)
stream_table_view(restored, title='Restored', title_color='green')

In [None]:
from gretel_helpers.streaming_view import stream_table_view
from gretel_client.transformers.base import FieldRef
from gretel_client.transformers.data_transform_pipeline import DataTransformPipeline, DataPath
from gretel_client.transformers.transformers import CombineConfig, DropConfig

# Let's combine City, Zipcode and Country into one field. We also want to drop Zipcode and Country completely
# from the output record.
xf_combine = CombineConfig(combine=FieldRef(['Zipcode', 'Country']),
                           separator=", ")
xf_drop = DropConfig()

data_paths = [
    DataPath(input='City', xforms=xf_combine, output='location'),
    DataPath(input='Zipcode', xforms=xf_drop),
    DataPath(input='Country', xforms=xf_drop),
    DataPath(input='*')
]

xf = DataTransformPipeline(data_paths)
stream_table_view(data[0], title='Original')
stream_table_view(data[0], xf=xf, title='Transformed', title_color='red')

In [None]:
from gretel_helpers.streaming_view import stream_table_view
from gretel_client.transformers.base import FieldRef
from gretel_client.transformers.data_transform_pipeline import DataTransformPipeline, DataPath
from gretel_client.transformers.transformers import DropConfig

# Let's drop the credit card field completely
xf_list = [DropConfig()]

data_paths = [
    DataPath(input='Credit Card', xforms=xf_list),
    DataPath(input='*')
]

xf = DataTransformPipeline(data_paths)
stream_table_view(data[0], title='Original')
stream_table_view(data[0], xf=xf, title='Transformed', title_color='red')

In [None]:
from gretel_helpers.streaming_view import stream_table_view
from gretel_client.transformers.base import FieldRef
from gretel_client.transformers.data_transform_pipeline import DataTransformPipeline, DataPath
from gretel_client.transformers.transformers import FakeConstantConfig

# Replace all values labeled latitude, longitude with constant dummy coordinates and let's replace
# the field name with randomly faked american and mexican names.
fake_loc = FakeConstantConfig(seed=8675309, labels=['latitude', 'longitude'])
fake_name = FakeConstantConfig(seed=8675309,
                               fake_method='name',
                               locales=['en-US', 'es-MX'])

data_paths = [
    DataPath(input='Name', xforms=fake_name),
    DataPath(input='*', xforms=fake_loc)
]

xf = DataTransformPipeline(data_paths)
stream_table_view(data[0], title='Original')
stream_table_view(data[0], xf=xf, title='Transformed', title_color='red')

In [None]:
from gretel_helpers.streaming_view import stream_table_view
from gretel_client.transformers.base import FieldRef
from gretel_client.transformers.data_transform_pipeline import DataTransformPipeline, DataPath
from gretel_client.transformers.transformers import RedactWithLabelConfig

# Let's redact anything deemed a location with the label 'location'
xf_list = [RedactWithLabelConfig(labels=['location'])]

data_paths = [DataPath(input='*', xforms=xf_list)]

xf = DataTransformPipeline(data_paths)
stream_table_view(data[0], title='Original')
stream_table_view(data[0], xf=xf, title='Transformed', title_color='red')

In [None]:
from gretel_helpers.streaming_view import stream_table_view
from gretel_client.transformers.base import FieldRef
from gretel_client.transformers.data_transform_pipeline import DataTransformPipeline, DataPath
from gretel_client.transformers.transformers import RedactWithCharConfig, RedactWithStringConfig

# Let's redact customer ID's by crossing each character out 
# and customer names by replacing them with our own string: "CUSTOMER_NAME"

xf_redact_char = [RedactWithCharConfig()]
xf_redact_string = [RedactWithStringConfig(string='CUSTOMER_NAME')]

data_paths = [DataPath(input='Customer ID', xforms=xf_redact_char), 
              DataPath(input='Name', xforms=xf_redact_string), 
              DataPath(input='*')]

xf = DataTransformPipeline(data_paths)
stream_table_view(data[0], title='Original')
stream_table_view(data[0], xf=xf, title='Transformed', title_color='red')

In [None]:
from gretel_helpers.streaming_view import stream_table_view
from gretel_client.transformers.base import FieldRef
from gretel_client.transformers.data_transform_pipeline import DataTransformPipeline, DataPath
from gretel_client.transformers.transformers import SecureHashConfig

# Here we are replacing the customer ID's with a hash value
xf_hash = SecureHashConfig(secret='MY_SECRET')

data_paths = [
    DataPath(input='Customer ID', xforms=xf_hash),
    DataPath(input='*')
]

xf = DataTransformPipeline(data_paths)

stream_table_view(data[0])
stream_table_view(data[0], xf=xf)

In [None]:
from gretel_helpers.streaming_view import stream_table_view
from gretel_client.transformers.base import FieldRef
from gretel_client.transformers.data_restore_pipeline import DataRestorePipeline
from gretel_client.transformers.data_transform_pipeline import DataTransformPipeline, DataPath
from gretel_client.transformers.transformers import SecureFpeConfig, DateShiftConfig

# Let's encrypt and decrypt credit card numbers, dates, lat-lon, and customer id's on 5 records
xf_cc_cid_fpe = SecureFpeConfig(secret=sha256, radix=10)
xf_numbers_fpe = SecureFpeConfig(
    labels=['us_zip_code', 'longitude', 'latitude'], secret=sha256, radix=10)
xf_name_fpe = SecureFpeConfig(secret=sha256, radix=62)
xf_date = DateShiftConfig(secret=sha256,
                          lower_range_days=-100,
                          upper_range_days=100)

data_paths = [
    DataPath(input='Credit Card', xforms=xf_cc_cid_fpe),
    DataPath(input='Customer ID', xforms=xf_cc_cid_fpe),
    DataPath(input='Name', xforms=xf_name_fpe),
    DataPath(input='Date', xforms=xf_date),
    DataPath(input='*', xforms=xf_numbers_fpe)
]

xf = DataTransformPipeline(data_paths)
rf = DataRestorePipeline(data_paths)


index = 0
for record in data:
    #let's save off a transformed record
    transformed = xf.transform_record(record)
    stream_table_view(record, clear=True, title='Original')
    stream_table_view(record, xf=xf, title='Transformed', title_color='red')

    # now let's take the transformations out of the transformed record
    restored = rf.transform_record(transformed)
    stream_table_view(restored,
                      sleep=2.0,
                      title='Restored',
                      title_color='green')
    index += 1
    if index > 5:
        break

In [None]:
from gretel_helpers.streaming_view import stream_table_view
from gretel_client.transformers.base import FieldRef
from gretel_client.transformers.data_transform_pipeline import DataTransformPipeline, DataPath
from gretel_client.transformers.transformers import RedactWithLabelConfig, FakeConstantConfig, SecureHashConfig, \
    BucketConfig, BucketRange, DropConfig, DateShiftConfig

# replace credit card numbers with their label
xf_entities = [
    RedactWithLabelConfig(labels=['credit_card_number']),
    FakeConstantConfig(seed=8675309, labels=['latitude', 'longitude'])
]

# fake names from several different locales.
xf_name = FakeConstantConfig(
    seed=8675309,
    fake_method='name',
    locales=['en-US', 'es-MX', 'tr-TR', 'fr-FR', 'ja-JP'])

# Secure Hash the customer ID
xf_hash = SecureHashConfig(secret='SECRET_KEY')

# Drop zipcode field
xf_drop = DropConfig()

# bucketize country names
bucket_range = BucketRange([('A', 'L'), ('M', 'Z')], labels=['A-L', 'M-Z'])
xf_bucket = BucketConfig(bucket_range=bucket_range)

# Shift the date by a unique amount per customer ID
xf_date = DateShiftConfig(secret=sha256,
                          lower_range_days=-100,
                          upper_range_days=100,
                          tweak=FieldRef('Customer ID'))

data_paths = [
    DataPath(input='Customer ID', xforms=xf_hash),
    DataPath(input='Name', xforms=xf_hash),
    DataPath(input='Zipcode', xforms=xf_drop),
    DataPath(input='Country', xforms=xf_bucket),
    DataPath(input='Date', xforms=xf_date),
    DataPath(input='*', xforms=xf_entities)
]

xf = DataTransformPipeline(data_paths)

index = 0
for record in data:
    stream_table_view(record, clear=True, title='Original')
    stream_table_view(record,
                      xf=xf,
                      sleep=1.0,
                      title='Transformed',
                      title_color='red')
    index += 1
    if index > 5:
        break

In [None]:
from gretel_helpers.streaming_view import stream_table_view
from gretel_client.transformers.base import FieldRef
from gretel_client.transformers.data_transform_pipeline import DataTransformPipeline, DataPath
from gretel_client.transformers.transformers import SecureFpeConfig, ConditionalConfig, RedactWithCharConfig

# let's replace any customer ID where the field Name's first letter matches A-L, a-l with format preserved encryption
# or radact it with single letters otherwise.
low_letters_regex = r'\b[A-La-l]'
xf_fpe = SecureFpeConfig(
    secret="2B7E151628AED2A6ABF7158809CF4F3CEF4359D8D580AA4F7F036D6F04FC6A94",
    radix=10)

xf_low_letter_names_hide_cid = ConditionalConfig(
    conditional_value=FieldRef('Name'),
    regex=low_letters_regex,
    true_xform=xf_fpe,
    false_xform=RedactWithCharConfig())

data_paths_encrypt = [
    DataPath(input='Customer ID', xforms=xf_low_letter_names_hide_cid),
    DataPath(input='*')
]
xf = DataTransformPipeline(data_paths_encrypt)
index = 0
for record in data:
    stream_table_view(record, clear=True, title='Original')
    stream_table_view(record,
                      xf=xf,
                      sleep=2.0,
                      title='Transformed',
                      title_color='red')
    index += 1
    if index > 5:
        break