In [None]:
# | default_exp sanitizer

In [None]:
# | export

import builtins
import logging
import re
import sys
from logging import Logger
from typing import *

import IPython
from airt.logger import get_logger
from airt.patching import patch

In [None]:
import random
import string

In [None]:
def generate_random_string(n: int = 20, include_punctuation: bool = False):
    characters_to_use = string.ascii_uppercase + string.digits
    if include_punctuation:
        characters_to_use = characters_to_use + '!"#$%&*+,-.:;<=>?@[\\]^_`{|}~'
    return "".join(random.choices(characters_to_use, k=n))

In [None]:
generate_random_string()

'BB2W6PZ28UDFRKF9ZN8Q'

In [None]:
generate_random_string(n=36, include_punctuation=True)

'5;18T|]}R*38=.64AU0"}F`;`"T,XK=L86|V'

In [None]:
string.punctuation

'!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~'

In [None]:
# | export


def sanitize_secrets(s: str) -> str:
    d = {
        "(A3T[A-Z0-9]|AKIA|AGPA|AIDA|AROA|AIPA|ANPA|ANVA|ASIA)\w+": "*" * 20,
        "AWS_ACCESS_KEY_ID':\s*'.*?'": "AWS_ACCESS_KEY_ID': " + f"'{'*' * 20}'",
        "AWSAccessKeyId':\s*'.*?'": "AWSAccessKeyId': " + f"'{'*' * 20}'",
        "KEY':\s*'.*?'": "KEY': " + f"'{'*' * 40}'",
        "KEY\s*=\s*'.*?'": "KEY = " + f"'{'*' * 40}'",
        "AZURE_SUBSCRIPTION_ID':\s*'.*?'": "AZURE_SUBSCRIPTION_ID': " + f"'{'*' * 36}'",
        "AZURE_TENANT_ID':\s*'.*?'": "AZURE_TENANT_ID': " + f"'{'*' * 36}'",
        "AZURE_CLIENT_ID':\s*'.*?'": "AZURE_CLIENT_ID': " + f"'{'*' * 36}'",
        "SECRET':\s*'.*?'": "SECRET': " + f"'{'*' * 40}'",
        "PASSWORD':\s*'.*?'": "PASSWORD': " + f"'{'*' * 40}'",
        "POLICY':\s*'.*?'": "POLICY': " + f"'{'*' * 252}'",
        "SIGNATURE':\s*'.*?'": "SIGNATURE': " + f"'{'*' * 28}'",
        "://.*@": "://" + "*" * 40 + "@",
        "value':\s*'[a-zA-Z0-9]{8}-[a-zA-Z0-9]{4}-[a-zA-Z0-9]{4}-[a-zA-Z0-9]{4}-[a-zA-Z0-9]{12}'": f"value': '{'*'*8}-{'*'*4}-{'*'*4}-{'*'*4}-{'*'*12}'",
        "[a-zA-Z0-9]{5}~[a-zA-Z0-9-]{34}": f"{'*'*40}",
    }

    d2 = {k.replace("'", '"'): v.replace("'", '"') for k, v in d.items() if "'" in k}
    for k, v in d2.items():
        d[k] = v

    d3 = {k.lower(): v.lower() for k, v in d.items()}
    for k, v in d3.items():
        d[k] = v

    d4 = {
        k.replace("':", "="): v.replace("':", " =") for k, v in d.items() if "':" in k
    }
    for k, v in d4.items():
        d[k] = v

    d = {k.replace("'", "\\\\?'"): v for k, v in d.items()}

    if not isinstance(s, str):
        s = s.__repr__()

    for pattern, replacement in d.items():
        s = re.sub(pattern, replacement, s)

    return s

In [None]:
test_string = (
    """[INFO] airt_service.batch_job_components.fastapi: FastAPIBatchJobContext.create_job(self=FastAPIBatchJobContext(task=csv_processing), command='process_csv 80 30 PersonId \'["OccurredTime"]\' --blocksize 256MB --kwargs_json \'{"usecols": [0, 1, 2, 3, 4], "parse_dates": ["OccurredTime"]}\' --deduplicate_data', environment_vars={'AWS_ACCESS_KEY_ID': '"""
    + generate_random_string(20)
    + """', 'AWS_SECRET_ACCESS_KEY': '"""
    + generate_random_string(36)
    + """', 'AWS_DEFAULT_REGION': 'eu-west-1', 'AZURE_SUBSCRIPTION_ID': '"""
    + f"{generate_random_string(8)}-{generate_random_string(4)}-{generate_random_string(4)}-{generate_random_string(4)}-{generate_random_string(12)}"
    + """', 'AZURE_TENANT_ID': '"""
    + f"{generate_random_string(8)}-{generate_random_string(4)}-{generate_random_string(4)}-{generate_random_string(4)}-{generate_random_string(12)}"
    + """', 'AZURE_CLIENT_ID': '"""
    + f"{generate_random_string(8)}-{generate_random_string(4)}-{generate_random_string(4)}-{generate_random_string(4)}-{generate_random_string(12)}"
    + """', 'AZURE_CLIENT_SECRET': '"""
    + generate_random_string(40, include_punctuation=True)
    + """', 'AZURE_STORAGE_ACCOUNT_PREFIX': 'kumsairtsdev', 'AZURE_RESOURCE_GROUP': 'kumaran-airt-service-dev', 'AIRT_SERVICE_SUPER_USER_PASSWORD': '"""
    + generate_random_string(26)
    + """', 'AIRT_TOKEN_SECRET_KEY': 'asdflkjoemnnaaserniuhenbcloieenfase', 'STORAGE_BUCKET_PREFIX': 'kumaran-airt-service', 'DB_USERNAME': 'root', 'DB_PASSWORD': 'SuperSecretPassword', 'DB_HOST': 'kumaran-mysql', 'DB_PORT': '3306', 'DB_DATABASE': 'airt_service', 'DB_DATABASE_SERVER': 'mysql'})"""
)
print(test_string)
expected = """[INFO] airt_service.batch_job_components.fastapi: FastAPIBatchJobContext.create_job(self=FastAPIBatchJobContext(task=csv_processing), command='process_csv 80 30 PersonId \'["OccurredTime"]\' --blocksize 256MB --kwargs_json \'{"usecols": [0, 1, 2, 3, 4], "parse_dates": ["OccurredTime"]}\' --deduplicate_data', environment_vars={'AWS_ACCESS_KEY_ID': '********************', 'AWS_SECRET_ACCESS_KEY': '****************************************', 'AWS_DEFAULT_REGION': 'eu-west-1', 'AZURE_SUBSCRIPTION_ID': '************************************', 'AZURE_TENANT_ID': '************************************', 'AZURE_CLIENT_ID': '************************************', 'AZURE_CLIENT_SECRET': '****************************************', 'AZURE_STORAGE_ACCOUNT_PREFIX': 'kumsairtsdev', 'AZURE_RESOURCE_GROUP': 'kumaran-airt-service-dev', 'AIRT_SERVICE_SUPER_USER_PASSWORD': '****************************************', 'AIRT_TOKEN_SECRET_KEY': '****************************************', 'STORAGE_BUCKET_PREFIX': 'kumaran-airt-service', 'DB_USERNAME': 'root', 'DB_PASSWORD': '****************************************', 'DB_HOST': 'kumaran-mysql', 'DB_PORT': '3306', 'DB_DATABASE': 'airt_service', 'DB_DATABASE_SERVER': 'mysql'})"""
actual = sanitize_secrets(test_string)
n = -1
print(actual[:n])
print(expected[:n])
assert actual[:n] == expected[:n]

[INFO] airt_service.batch_job_components.fastapi: FastAPIBatchJobContext.create_job(self=FastAPIBatchJobContext(task=csv_processing), command='process_csv 80 30 PersonId '["OccurredTime"]' --blocksize 256MB --kwargs_json '{"usecols": [0, 1, 2, 3, 4], "parse_dates": ["OccurredTime"]}' --deduplicate_data', environment_vars={'AWS_ACCESS_KEY_ID': '1WNAY3Y5D1H2J8RG9RO1', 'AWS_SECRET_ACCESS_KEY': 'UHNAMZZJN4FFVQ1W8BSZU4VB8B2FSWJKWEZ8', 'AWS_DEFAULT_REGION': 'eu-west-1', 'AZURE_SUBSCRIPTION_ID': '0UG6WBS5-OF5W-X4NC-SEXF-MWTAWW6A1LAQ', 'AZURE_TENANT_ID': 'V28OBCPX-32VS-87R9-TZUN-FV5Y6FD1N7RF', 'AZURE_CLIENT_ID': '39AFB0E8-5U3Z-OXK9-U70W-S12WGN5K93WV', 'AZURE_CLIENT_SECRET': '-~9&_50N:{4UOKN{T"MLWK7B+VS146Y+,18%@SR2', 'AZURE_STORAGE_ACCOUNT_PREFIX': 'kumsairtsdev', 'AZURE_RESOURCE_GROUP': 'kumaran-airt-service-dev', 'AIRT_SERVICE_SUPER_USER_PASSWORD': 'F781RRC2THVQV8W87MKGBHTMB2', 'AIRT_TOKEN_SECRET_KEY': 'asdflkjoemnnaaserniuhenbcloieenfase', 'STORAGE_BUCKET_PREFIX': 'kumaran-airt-service', 'D

In [None]:
test = (
    f"""--env AWS_SECRET_ACCESS_KEY=\'{generate_random_string(40)}\' --env AWS_DEFAULT_REGION=\'eu-west-1\' --env AZURE_SUBSCRIPTION_ID=\'{generate_random_string(8)}-{generate_random_string(4)}-{generate_random_string(4)}-{generate_random_string(4)}-{generate_random_string(12)}\' """
    + f"""--env AZURE_TENANT_ID=\'{generate_random_string(8)}-{generate_random_string(4)}-{generate_random_string(4)}-{generate_random_string(4)}-{generate_random_string(12)}\' --env AZURE_CLIENT_ID=\'{generate_random_string(8)}-{generate_random_string(4)}-{generate_random_string(4)}-{generate_random_string(4)}-{generate_random_string(12)}\' """
    + f"""--env AZURE_CLIENT_SECRET=\'{generate_random_string(40, include_punctuation=True)}\' --env AZURE_STORAGE_ACCOUNT_PREFIX=\'kumsairtsdev\' --env AZURE_RESOURCE_GROUP=\'kumaran-airt-service-dev\' --env STORAGE_BUCKET_PREFIX=\'kumaran-airt-service\' --env DB_USERNAME=\'root\' --env DB_PASSWORD=\'SuperSecretPassword\' --env DB_HOST=\'kumaran-mysql\' --env DB_PORT=\'3306\' --env DB_DATABASE=\'airt_service\' --env DB_DATABASE_SERVER=\'mysql\'"""
)
expected = """--env AWS_SECRET_ACCESS_KEY = '****************************************' --env AWS_DEFAULT_REGION='eu-west-1' --env AZURE_SUBSCRIPTION_ID = '************************************' --env AZURE_TENANT_ID = '************************************' --env AZURE_CLIENT_ID = '************************************' --env AZURE_CLIENT_SECRET = '****************************************' --env AZURE_STORAGE_ACCOUNT_PREFIX='kumsairtsdev' --env AZURE_RESOURCE_GROUP='kumaran-airt-service-dev' --env STORAGE_BUCKET_PREFIX='kumaran-airt-service' --env DB_USERNAME='root' --env DB_PASSWORD = '****************************************' --env DB_HOST='kumaran-mysql' --env DB_PORT='3306' --env DB_DATABASE='airt_service' --env DB_DATABASE_SERVER='mysql'"""
actual = sanitize_secrets(test)
print(actual)
print(expected)
assert actual == expected

--env AWS_SECRET_ACCESS_KEY = '****************************************' --env AWS_DEFAULT_REGION='eu-west-1' --env AZURE_SUBSCRIPTION_ID = '************************************' --env AZURE_TENANT_ID = '************************************' --env AZURE_CLIENT_ID = '************************************' --env AZURE_CLIENT_SECRET = '****************************************' --env AZURE_STORAGE_ACCOUNT_PREFIX='kumsairtsdev' --env AZURE_RESOURCE_GROUP='kumaran-airt-service-dev' --env STORAGE_BUCKET_PREFIX='kumaran-airt-service' --env DB_USERNAME='root' --env DB_PASSWORD = '****************************************' --env DB_HOST='kumaran-mysql' --env DB_PORT='3306' --env DB_DATABASE='airt_service' --env DB_DATABASE_SERVER='mysql'
--env AWS_SECRET_ACCESS_KEY = '****************************************' --env AWS_DEFAULT_REGION='eu-west-1' --env AZURE_SUBSCRIPTION_ID = '************************************' --env AZURE_TENANT_ID = '************************************' --env AZURE_CLIENT_ID 

In [None]:
test = f"DataBlob(id=185, uuid=UUID('d90014da-a38b-4b38-a26e-24849d5c0879'), type='s3', uri='s3://{generate_random_string(20)}:{generate_random_string(40)}@bucket', source='s3://bucket', total_steps=1, completed_steps=0, folder_size=None, cloud_provider=<CloudProvider.aws: 'aws'>, region='eu-west-1', error=None, disabled=False, path=None, created=datetime.datetime(2022, 10, 19, 4, 59, 49), user_id=34, pulled_on=None, tags=[Tag(name='new_tag', id=9, created=datetime.datetime(2022, 10, 18, 12, 30, 43), uuid=UUID('1662ea89-5018-4728-a71a-ecc502a22fd7'))])"
expected = "DataBlob(id=185, uuid=UUID('d90014da-a38b-4b38-a26e-24849d5c0879'), type='s3', uri='s3://****************************************@bucket', source='s3://bucket', total_steps=1, completed_steps=0, folder_size=None, cloud_provider=<CloudProvider.aws: 'aws'>, region='eu-west-1', error=None, disabled=False, path=None, created=datetime.datetime(2022, 10, 19, 4, 59, 49), user_id=34, pulled_on=None, tags=[Tag(name='new_tag', id=9, created=datetime.datetime(2022, 10, 18, 12, 30, 43), uuid=UUID('1662ea89-5018-4728-a71a-ecc502a22fd7'))])"
actual = sanitize_secrets(test)
n = -1
print(actual[:n])
print(expected[:n])
assert actual[:n] == expected[:n]

DataBlob(id=185, uuid=UUID('d90014da-a38b-4b38-a26e-24849d5c0879'), type='s3', uri='s3://****************************************@bucket', source='s3://bucket', total_steps=1, completed_steps=0, folder_size=None, cloud_provider=<CloudProvider.aws: 'aws'>, region='eu-west-1', error=None, disabled=False, path=None, created=datetime.datetime(2022, 10, 19, 4, 59, 49), user_id=34, pulled_on=None, tags=[Tag(name='new_tag', id=9, created=datetime.datetime(2022, 10, 18, 12, 30, 43), uuid=UUID('1662ea89-5018-4728-a71a-ecc502a22fd7'))]
DataBlob(id=185, uuid=UUID('d90014da-a38b-4b38-a26e-24849d5c0879'), type='s3', uri='s3://****************************************@bucket', source='s3://bucket', total_steps=1, completed_steps=0, folder_size=None, cloud_provider=<CloudProvider.aws: 'aws'>, region='eu-west-1', error=None, disabled=False, path=None, created=datetime.datetime(2022, 10, 19, 4, 59, 49), user_id=34, pulled_on=None, tags=[Tag(name='new_tag', id=9, created=datetime.datetime(2022, 10, 18, 1

In [None]:
test = f"DataBlob(id=189, uuid=UUID('c03e2c7f-e974-4f0d-ae2e-6735f329a57b'), type='azure_blob_storage', uri='https://{generate_random_string(88)}@testairtservice.blob.core.windows.net/test-container/account_312571_events', source='https://testairtservice.blob.core.windows.net/test-container/account_312571_events', total_steps=1, completed_steps=0, folder_size=None, cloud_provider=<CloudProvider.azure: 'azure'>, region='westeurope', error=None, disabled=False, path=None, created=datetime.datetime(2022, 10, 19, 5, 0, 4), user_id=34, pulled_on=None, tags=[Tag(name='latest', id=3, created=datetime.datetime(2022, 10, 18, 12, 28, 54), uuid=UUID('38763262-eceb-4501-8ce2-b6ca66bce7c1')), Tag(name='my_azure_blob_storage_datablob_tag', id=4, created=datetime.datetime(2022, 10, 18, 12, 29, 9), uuid=UUID('0aaaa594-c164-4555-bf16-d8ce8deb14a6'))])"
expected = "DataBlob(id=189, uuid=UUID('c03e2c7f-e974-4f0d-ae2e-6735f329a57b'), type='azure_blob_storage', uri='https://****************************************@testairtservice.blob.core.windows.net/test-container/account_312571_events', source='https://testairtservice.blob.core.windows.net/test-container/account_312571_events', total_steps=1, completed_steps=0, folder_size=None, cloud_provider=<CloudProvider.azure: 'azure'>, region='westeurope', error=None, disabled=False, path=None, created=datetime.datetime(2022, 10, 19, 5, 0, 4), user_id=34, pulled_on=None, tags=[Tag(name='latest', id=3, created=datetime.datetime(2022, 10, 18, 12, 28, 54), uuid=UUID('38763262-eceb-4501-8ce2-b6ca66bce7c1')), Tag(name='my_azure_blob_storage_datablob_tag', id=4, created=datetime.datetime(2022, 10, 18, 12, 29, 9), uuid=UUID('0aaaa594-c164-4555-bf16-d8ce8deb14a6'))])"
actual = sanitize_secrets(test)
n = -1
print(actual[:n])
print(expected[:n])
assert actual[:n] == expected[:n]

DataBlob(id=189, uuid=UUID('c03e2c7f-e974-4f0d-ae2e-6735f329a57b'), type='azure_blob_storage', uri='https://****************************************@testairtservice.blob.core.windows.net/test-container/account_312571_events', source='https://testairtservice.blob.core.windows.net/test-container/account_312571_events', total_steps=1, completed_steps=0, folder_size=None, cloud_provider=<CloudProvider.azure: 'azure'>, region='westeurope', error=None, disabled=False, path=None, created=datetime.datetime(2022, 10, 19, 5, 0, 4), user_id=34, pulled_on=None, tags=[Tag(name='latest', id=3, created=datetime.datetime(2022, 10, 18, 12, 28, 54), uuid=UUID('38763262-eceb-4501-8ce2-b6ca66bce7c1')), Tag(name='my_azure_blob_storage_datablob_tag', id=4, created=datetime.datetime(2022, 10, 18, 12, 29, 9), uuid=UUID('0aaaa594-c164-4555-bf16-d8ce8deb14a6'))]
DataBlob(id=189, uuid=UUID('c03e2c7f-e974-4f0d-ae2e-6735f329a57b'), type='azure_blob_storage', uri='https://****************************************@te

In [None]:
test = "DataBlob(id=190, uuid=UUID('7017b354-8009-4132-af05-986b167cc3d3'), type='db', uri='mysql://username:password@db.example.com:3306/database_to_import/events', source='mysql://db.example.com:3306/database_to_import/events', total_steps=1, completed_steps=0, folder_size=None, cloud_provider=<CloudProvider.aws: 'aws'>, region='eu-west-1', error=None, disabled=False, path=None, created=datetime.datetime(2022, 10, 19, 5, 0, 5), user_id=34, pulled_on=None, tags=[Tag(name='latest', id=3, created=datetime.datetime(2022, 10, 18, 12, 28, 54), uuid=UUID('38763262-eceb-4501-8ce2-b6ca66bce7c1')), Tag(name='my_db_datablob_tag', id=5, created=datetime.datetime(2022, 10, 18, 12, 29, 11), uuid=UUID('b6b7787e-bd74-4c19-bf0d-6c2245e56796'))])"
expected = "DataBlob(id=190, uuid=UUID('7017b354-8009-4132-af05-986b167cc3d3'), type='db', uri='mysql://****************************************@db.example.com:3306/database_to_import/events', source='mysql://db.example.com:3306/database_to_import/events', total_steps=1, completed_steps=0, folder_size=None, cloud_provider=<CloudProvider.aws: 'aws'>, region='eu-west-1', error=None, disabled=False, path=None, created=datetime.datetime(2022, 10, 19, 5, 0, 5), user_id=34, pulled_on=None, tags=[Tag(name='latest', id=3, created=datetime.datetime(2022, 10, 18, 12, 28, 54), uuid=UUID('38763262-eceb-4501-8ce2-b6ca66bce7c1')), Tag(name='my_db_datablob_tag', id=5, created=datetime.datetime(2022, 10, 18, 12, 29, 11), uuid=UUID('b6b7787e-bd74-4c19-bf0d-6c2245e56796'))])"
actual = sanitize_secrets(test)
n = -1
print(actual[:n])
print(expected[:n])
assert actual[:n] == expected[:n]

DataBlob(id=190, uuid=UUID('7017b354-8009-4132-af05-986b167cc3d3'), type='db', uri='mysql://****************************************@db.example.com:3306/database_to_import/events', source='mysql://db.example.com:3306/database_to_import/events', total_steps=1, completed_steps=0, folder_size=None, cloud_provider=<CloudProvider.aws: 'aws'>, region='eu-west-1', error=None, disabled=False, path=None, created=datetime.datetime(2022, 10, 19, 5, 0, 5), user_id=34, pulled_on=None, tags=[Tag(name='latest', id=3, created=datetime.datetime(2022, 10, 18, 12, 28, 54), uuid=UUID('38763262-eceb-4501-8ce2-b6ca66bce7c1')), Tag(name='my_db_datablob_tag', id=5, created=datetime.datetime(2022, 10, 18, 12, 29, 11), uuid=UUID('b6b7787e-bd74-4c19-bf0d-6c2245e56796'))]
DataBlob(id=190, uuid=UUID('7017b354-8009-4132-af05-986b167cc3d3'), type='db', uri='mysql://****************************************@db.example.com:3306/database_to_import/events', source='mysql://db.example.com:3306/database_to_import/events', 

In [None]:
test = "DataBlob(id=191, uuid=UUID('7cb023b9-9c79-4193-a5de-15cd3b197301'), type='db', uri='clickhouse+native://username:password@db.example.com:3306/database_to_import/events', source='clickhouse+native://db.example.com:3306/database_to_import/events', total_steps=1, completed_steps=0, folder_size=None, cloud_provider=<CloudProvider.aws: 'aws'>, region='eu-west-1', error=None, disabled=False, path=None, created=datetime.datetime(2022, 10, 19, 5, 0, 6), user_id=34, pulled_on=None, tags=[Tag(name='latest', id=3, created=datetime.datetime(2022, 10, 18, 12, 28, 54), uuid=UUID('38763262-eceb-4501-8ce2-b6ca66bce7c1')), Tag(name='my_clickhouse_datablob_tag', id=6, created=datetime.datetime(2022, 10, 18, 12, 29, 11), uuid=UUID('4011d87d-de84-4482-a1a8-cafdd623be91'))])"
expected = "DataBlob(id=191, uuid=UUID('7cb023b9-9c79-4193-a5de-15cd3b197301'), type='db', uri='clickhouse+native://****************************************@db.example.com:3306/database_to_import/events', source='clickhouse+native://db.example.com:3306/database_to_import/events', total_steps=1, completed_steps=0, folder_size=None, cloud_provider=<CloudProvider.aws: 'aws'>, region='eu-west-1', error=None, disabled=False, path=None, created=datetime.datetime(2022, 10, 19, 5, 0, 6), user_id=34, pulled_on=None, tags=[Tag(name='latest', id=3, created=datetime.datetime(2022, 10, 18, 12, 28, 54), uuid=UUID('38763262-eceb-4501-8ce2-b6ca66bce7c1')), Tag(name='my_clickhouse_datablob_tag', id=6, created=datetime.datetime(2022, 10, 18, 12, 29, 11), uuid=UUID('4011d87d-de84-4482-a1a8-cafdd623be91'))])"
actual = sanitize_secrets(test)
n = -1
print(actual[:n])
print(expected[:n])
assert actual[:n] == expected[:n]

DataBlob(id=191, uuid=UUID('7cb023b9-9c79-4193-a5de-15cd3b197301'), type='db', uri='clickhouse+native://****************************************@db.example.com:3306/database_to_import/events', source='clickhouse+native://db.example.com:3306/database_to_import/events', total_steps=1, completed_steps=0, folder_size=None, cloud_provider=<CloudProvider.aws: 'aws'>, region='eu-west-1', error=None, disabled=False, path=None, created=datetime.datetime(2022, 10, 19, 5, 0, 6), user_id=34, pulled_on=None, tags=[Tag(name='latest', id=3, created=datetime.datetime(2022, 10, 18, 12, 28, 54), uuid=UUID('38763262-eceb-4501-8ce2-b6ca66bce7c1')), Tag(name='my_clickhouse_datablob_tag', id=6, created=datetime.datetime(2022, 10, 18, 12, 29, 11), uuid=UUID('4011d87d-de84-4482-a1a8-cafdd623be91'))]
DataBlob(id=191, uuid=UUID('7cb023b9-9c79-4193-a5de-15cd3b197301'), type='db', uri='clickhouse+native://****************************************@db.example.com:3306/database_to_import/events', source='clickhouse+n

In [None]:
test = (
    """FromLocalResponse(uuid=UUID('b19157e3-4bb9-4755-8cfa-208f87d32c04'), type='local', presigned={'url': 'https://kumaran-airt-service-eu-west-1.s3.amazonaws.com/', 'fields': {'key': '34/datablob/192/${filename}', 'AWSAccessKeyId': '"""
    + generate_random_string(20)
    + """', 'policy': '"""
    + generate_random_string(252)
    + """', 'signature': '"""
    + generate_random_string(28)
    + """'}})"""
)
expected = "FromLocalResponse(uuid=UUID('b19157e3-4bb9-4755-8cfa-208f87d32c04'), type='local', presigned={'url': 'https://kumaran-airt-service-eu-west-1.s3.amazonaws.com/', 'fields': {'key': '****************************************', 'AWSAccessKeyId': '********************', 'policy': '************************************************************************************************************************************************************************************************************************************************************', 'signature': '****************************'}})"
actual = sanitize_secrets(test)
n = -1
print(actual[:n])
print(expected[:n])
assert actual[:n] == expected[:n]

FromLocalResponse(uuid=UUID('b19157e3-4bb9-4755-8cfa-208f87d32c04'), type='local', presigned={'url': 'https://kumaran-airt-service-eu-west-1.s3.amazonaws.com/', 'fields': {'key': '****************************************', 'AWSAccessKeyId': '********************', 'policy': '************************************************************************************************************************************************************************************************************************************************************', 'signature': '****************************'}}
FromLocalResponse(uuid=UUID('b19157e3-4bb9-4755-8cfa-208f87d32c04'), type='local', presigned={'url': 'https://kumaran-airt-service-eu-west-1.s3.amazonaws.com/', 'fields': {'key': '****************************************', 'AWSAccessKeyId': '********************', 'policy': '***************************************************************************************************************************************************

In [None]:
# | export
old_log = Logger._log

In [None]:
# | export
@patch
def _log(self: Logger, level, msg, *args, **kwargs):
    return old_log(self, level, sanitize_secrets(msg), *args, **kwargs)

In [None]:
logger = get_logger(__name__)
logger.log(logging.INFO, test_string)

22-10-19 13:28:49.216 [INFO] __main__: [INFO] airt_service.batch_job_components.fastapi: FastAPIBatchJobContext.create_job(self=FastAPIBatchJobContext(task=csv_processing), command='process_csv 80 30 PersonId '["OccurredTime"]' --blocksize 256MB --kwargs_json '{"usecols": [0, 1, 2, 3, 4], "parse_dates": ["OccurredTime"]}' --deduplicate_data', environment_vars={'AWS_ACCESS_KEY_ID': '********************', 'AWS_SECRET_ACCESS_KEY': '****************************************', 'AWS_DEFAULT_REGION': 'eu-west-1', 'AZURE_SUBSCRIPTION_ID': '************************************', 'AZURE_TENANT_ID': '************************************', 'AZURE_CLIENT_ID': '************************************', 'AZURE_CLIENT_SECRET': '****************************************', 'AZURE_STORAGE_ACCOUNT_PREFIX': 'kumsairtsdev', 'AZURE_RESOURCE_GROUP': 'kumaran-airt-service-dev', 'AIRT_SERVICE_SUPER_USER_PASSWORD': '****************************************', 'AIRT_TOKEN_SECRET_KEY': '********************************

In [None]:
logger.info(test_string)

22-10-19 13:28:49.220 [INFO] __main__: [INFO] airt_service.batch_job_components.fastapi: FastAPIBatchJobContext.create_job(self=FastAPIBatchJobContext(task=csv_processing), command='process_csv 80 30 PersonId '["OccurredTime"]' --blocksize 256MB --kwargs_json '{"usecols": [0, 1, 2, 3, 4], "parse_dates": ["OccurredTime"]}' --deduplicate_data', environment_vars={'AWS_ACCESS_KEY_ID': '********************', 'AWS_SECRET_ACCESS_KEY': '****************************************', 'AWS_DEFAULT_REGION': 'eu-west-1', 'AZURE_SUBSCRIPTION_ID': '************************************', 'AZURE_TENANT_ID': '************************************', 'AZURE_CLIENT_ID': '************************************', 'AZURE_CLIENT_SECRET': '****************************************', 'AZURE_STORAGE_ACCOUNT_PREFIX': 'kumsairtsdev', 'AZURE_RESOURCE_GROUP': 'kumaran-airt-service-dev', 'AIRT_SERVICE_SUPER_USER_PASSWORD': '****************************************', 'AIRT_TOKEN_SECRET_KEY': '********************************

In [None]:
# display(test_string)

In [None]:
# | export
old_publish_display_data = IPython.core.display_functions.publish_display_data

In [None]:
# | export


def new_publish_display_data(
    data,
    metadata=None,
    source=IPython.core.display_functions._sentinel,
    *,
    transient=None,
    **kwargs,
):
    sanitized_data = {
        k: sanitize_secrets(v.__repr__() if not isinstance(v, str) else v)
        for k, v in data.items()
    }
    return old_publish_display_data(
        sanitized_data, metadata=metadata, source=source, transient=transient, **kwargs
    )


IPython.core.display_functions.publish_display_data = new_publish_display_data

In [None]:
display(test_string)

'[INFO] airt_service.batch_job_components.fastapi: FastAPIBatchJobContext.create_job(self=FastAPIBatchJobContext(task=csv_processing), command=\'process_csv 80 30 PersonId \'["OccurredTime"]\' --blocksize 256MB --kwargs_json \'{"usecols": [0, 1, 2, 3, 4], "parse_dates": ["OccurredTime"]}\' --deduplicate_data\', environment_vars={\'AWS_ACCESS_KEY_ID': '********************', \'AWS_SECRET_ACCESS_KEY': '****************************************', \'AWS_DEFAULT_REGION\': \'eu-west-1\', \'AZURE_SUBSCRIPTION_ID': '************************************', \'AZURE_TENANT_ID': '************************************', \'AZURE_CLIENT_ID': '************************************', \'AZURE_CLIENT_SECRET': '****************************************', \'AZURE_STORAGE_ACCOUNT_PREFIX\': \'kumsairtsdev\', \'AZURE_RESOURCE_GROUP\': \'kumaran-airt-service-dev\', \'AIRT_SERVICE_SUPER_USER_PASSWORD': '****************************************', \'AIRT_TOKEN_SECRET_KEY': '****************************************', \

In [None]:
# | exporti

old_print = builtins.print

In [None]:
# | export


def sanitized_print(*objects, sep=" ", end="\n", file=sys.stdout, flush=False):
    new_objs = [
        sanitize_secrets(obj.__repr__() if not isinstance(obj, str) else obj)
        for obj in objects
    ]
    return old_print(*new_objs, sep=sep, end=end, file=file, flush=flush)


# builtins.print = new_print

In [None]:
sanitized_print(test_string)

[INFO] airt_service.batch_job_components.fastapi: FastAPIBatchJobContext.create_job(self=FastAPIBatchJobContext(task=csv_processing), command='process_csv 80 30 PersonId '["OccurredTime"]' --blocksize 256MB --kwargs_json '{"usecols": [0, 1, 2, 3, 4], "parse_dates": ["OccurredTime"]}' --deduplicate_data', environment_vars={'AWS_ACCESS_KEY_ID': '********************', 'AWS_SECRET_ACCESS_KEY': '****************************************', 'AWS_DEFAULT_REGION': 'eu-west-1', 'AZURE_SUBSCRIPTION_ID': '************************************', 'AZURE_TENANT_ID': '************************************', 'AZURE_CLIENT_ID': '************************************', 'AZURE_CLIENT_SECRET': '****************************************', 'AZURE_STORAGE_ACCOUNT_PREFIX': 'kumsairtsdev', 'AZURE_RESOURCE_GROUP': 'kumaran-airt-service-dev', 'AIRT_SERVICE_SUPER_USER_PASSWORD': '****************************************', 'AIRT_TOKEN_SECRET_KEY': '****************************************', 'STORAGE_BUCKET_PREFIX': 'ku