In [1]:
import sys
sys.path.append("../../../../../")
import tempfile
from pathlib import Path


document = { 
    "regex_key_one": "*1value2*", 
    "regex_key_two": "Here is 1234 in the message" 
}


rule_yaml = """
---
filter: 'regex_key_one: "/.*value.*/" AND regex_key_two: "/.*1234.*/"'
regex_fields:
  - "regex_key_one"
  - "regex_key_two"
pseudonymizer:
  mapping:
      winlog.event_data.param1: "RE_WHOLE_FIELD"
      winlog.event_data.param2: "RE_WHOLE_FIELD"
  description: "..."
"""

rule_path = Path(tempfile.gettempdir()) / "concatenator"
rule_path.mkdir(exist_ok=True)
rule_file = rule_path / "data-stream.yml"
rule_file.write_text(rule_yaml)

processor_config = {
    "mydropper": {
        "type": "pseudonymizer",
        "specific_rules": [str(rule_path)],
        "generic_rules": ["/dev"],
        "outputs": [{"kafka": "topic"}],
        "pubkey_analyst": "../../../../../tests/testdata/unit/pseudonymizer/example_analyst_pub.pem",
        "pubkey_depseudo": "../../../../../tests/testdata/unit/pseudonymizer/example_depseudo_pub.pem",
        "hash_salt": "a_secret_tasty_ingredient",
        "regex_mapping":  "../../../../../tests/testdata/unit/pseudonymizer/rules/regex_mapping.yml",
        "max_cached_pseudonyms": 1000000
    }
}

In [2]:
from unittest import mock
from logprep.factory import Factory

mock_logger = mock.MagicMock()
pseudonymizer_processor = Factory.create(processor_config)
pseudonymizer_processor

pseudonymizer

In [3]:
from copy import deepcopy
mydocument = deepcopy(document)

print(f"before: {mydocument}")
pseudonymizer_processor.process(mydocument)
print(f"after: {mydocument}")

before: {'regex_key_one': '*1value2*', 'regex_key_two': 'Here is 1234 in the message'}
after: {'regex_key_one': '*1value2*', 'regex_key_two': 'Here is 1234 in the message'}
