From df638c45b8f443e5246b7951fdb37bcdcd43ffc4 Mon Sep 17 00:00:00 2001 From: Jarek Potiuk Date: Sun, 26 Apr 2026 23:00:35 +0200 Subject: [PATCH 1/2] Redact rendered template fields while still structured to preserve nested-key masking on truncation Generated-by: Claude Opus 4.7 (1M context) following the guidelines at https://github.com/apache/airflow/blob/main/contributing-docs/05_pull_requests.rst#gen-ai-assisted-contributions --- .../src/airflow/serialization/helpers.py | 5 +- .../tests/unit/serialization/test_helpers.py | 24 ++++++++ .../airflow/sdk/execution_time/task_runner.py | 5 +- .../execution_time/test_task_runner.py | 58 +++++++++++++++++++ 4 files changed, 90 insertions(+), 2 deletions(-) diff --git a/airflow-core/src/airflow/serialization/helpers.py b/airflow-core/src/airflow/serialization/helpers.py index 83b57d1c7ccaa..9984815218b31 100644 --- a/airflow-core/src/airflow/serialization/helpers.py +++ b/airflow-core/src/airflow/serialization/helpers.py @@ -105,7 +105,10 @@ def serialize_object(obj): serialized = serialize_object(template_field) if len(str(serialized)) > max_length: - rendered = redact(str(serialized), name) + # Redact while still structured to preserve nested-key context (so values under + # documented sensitive keys such as `password`, `token`, `secret`, `api_key` + # are masked recursively); only stringify the redacted result for truncation. + rendered = redact(serialized, name) return truncate_rendered_value(str(rendered), max_length) return serialized diff --git a/airflow-core/tests/unit/serialization/test_helpers.py b/airflow-core/tests/unit/serialization/test_helpers.py index 0dbd70fd747f7..1e453a5e3d8dc 100644 --- a/airflow-core/tests/unit/serialization/test_helpers.py +++ b/airflow-core/tests/unit/serialization/test_helpers.py @@ -657,3 +657,27 @@ def make_value(): assert all(isinstance(k, str) for k in inner[float_key]) assert "at 0x" not in str(r1) json.dumps(r1, sort_keys=True) + + +@pytest.mark.enable_redact +def test_serialize_template_field_masks_nested_sensitive_keys_on_truncation(monkeypatch): + """Nested sensitive-key masking applies consistently across the truncation path. + + A value under a documented sensitive key (``password``, ``token``, ``secret``, + ``api_key``) is masked recursively by ``redact()`` when the structured value + is walked. The oversized branch must redact while still structured so that + nested-key context is preserved before stringification — otherwise the post- + stringify ``redact()`` call only sees the outer field name and the recursive + walker cannot reach the inner key. + """ + monkeypatch.setenv("AIRFLOW__CORE__MAX_TEMPLATED_FIELD_LENGTH", "200") + + nested_value = "REGRESSION-FIXTURE-NESTED-PASSWORD-VALUE" + payload = {"nested": {"password": nested_value, "zz_pad": "A" * 500}} + + result = serialize_template_field(payload, "templates_dict") + + assert isinstance(result, str) + assert "Truncated. You can change this behaviour" in result + assert nested_value not in result + assert "***" in result diff --git a/task-sdk/src/airflow/sdk/execution_time/task_runner.py b/task-sdk/src/airflow/sdk/execution_time/task_runner.py index 32cdae7127707..9cb766c9b2d13 100644 --- a/task-sdk/src/airflow/sdk/execution_time/task_runner.py +++ b/task-sdk/src/airflow/sdk/execution_time/task_runner.py @@ -1084,7 +1084,10 @@ def serialize_object(obj): serialized = serialize_object(template_field) if len(str(serialized)) > max_length: - rendered = redact(str(serialized), name) + # Redact while still structured to preserve nested-key context (so values under + # documented sensitive keys such as `password`, `token`, `secret`, `api_key` + # are masked recursively); only stringify the redacted result for truncation. + rendered = redact(serialized, name) return truncate_rendered_value(str(rendered), max_length) return serialized diff --git a/task-sdk/tests/task_sdk/execution_time/test_task_runner.py b/task-sdk/tests/task_sdk/execution_time/test_task_runner.py index b851d73f74428..0f46b9942cdcd 100644 --- a/task-sdk/tests/task_sdk/execution_time/test_task_runner.py +++ b/task-sdk/tests/task_sdk/execution_time/test_task_runner.py @@ -3002,6 +3002,64 @@ def execute(self, context): assert env_vars_value.endswith("...") assert "***" in env_vars_value # secrets are redacted before truncation + @pytest.mark.enable_redact + def test_rendered_templates_mask_nested_keys_with_truncation( + self, create_runtime_ti, mock_supervisor_comms + ): + """Nested sensitive-key masking applies consistently across the truncation path. + + A value under a documented sensitive key (``password``, ``token``, ``secret``, + ``api_key``) is masked recursively by ``redact()`` when the structured value + is walked. The oversized branch must redact while still structured so that + nested-key context is preserved before stringification — otherwise the post- + stringify ``redact()`` call only sees the outer field name and the recursive + walker cannot reach the inner key. + """ + from airflow.sdk._shared.secrets_masker import _secrets_masker + + # The SDK masker starts with an empty sensitive-fields list in the test runtime + # (settings.py has not run); register `password` explicitly so the structured + # walker has something to match. Production workers get this from settings.py. + masker = _secrets_masker() + if "password" not in masker.sensitive_variables_fields: + masker.sensitive_variables_fields = list(masker.sensitive_variables_fields) + ["password"] + + nested_value = "REGRESSION-FIXTURE-NESTED-PASSWORD-VALUE" + + class CustomOperator(BaseOperator): + template_fields = ("env_vars",) + + def __init__(self, env_vars, *args, **kwargs): + super().__init__(*args, **kwargs) + self.env_vars = env_vars + + def execute(self, context): + pass + + # Nested 'password' key under enough padding to exceed default 4096-char limit. + env_vars = { + "DB": {"password": nested_value, "host": "db.internal", "zz_pad": "A" * 5000}, + } + + task = CustomOperator(task_id="test_nested_truncation_masking", env_vars=env_vars) + + runtime_ti = create_runtime_ti(task=task, dag_id="test_nested_truncation_masking_dag") + run(runtime_ti, context=runtime_ti.get_template_context(), log=mock.MagicMock()) + + msg = next( + c.kwargs["msg"] + for c in mock_supervisor_comms.send.mock_calls + if c.kwargs.get("msg") and getattr(c.kwargs["msg"], "type", None) == "SetRenderedFields" + ) + env_vars_value = msg.rendered_fields["env_vars"] + + assert isinstance(env_vars_value, str) + assert env_vars_value.startswith( + "Truncated. You can change this behaviour in [core]max_templated_field_length. " + ) + assert nested_value not in env_vars_value + assert "'password': '***'" in env_vars_value + @pytest.mark.enable_redact def test_rendered_templates_masks_secrets_in_complex_objects( self, create_runtime_ti, mock_supervisor_comms From e8380b24ced07e621c4ce827f6716fcd05fbeaa0 Mon Sep 17 00:00:00 2001 From: Jarek Potiuk Date: Mon, 27 Apr 2026 00:20:47 +0200 Subject: [PATCH 2/2] Isolate masker patterns in nested-key truncation test The new test_rendered_templates_mask_nested_keys_with_truncation shares the singleton SecretsMasker with earlier tests in the file. One of those (test_get_connection_from_context) fetches a connection whose password fixture value happens to be the literal string "password", which the SDK runtime registers as a regex mask via mask_secret(). When the new test runs after it, that regex substitutes the literal token "password" inside str(redacted) -- including the dict KEY name -- so the assertion "'password': '***'" fails because the key itself is also masked. Reset patterns/replacer for the test via monkeypatch (auto-restored on teardown) so the assertion isolates value-masking (the behavior under test) from key-token replacement (a side effect of leaked patterns). --- .../execution_time/test_task_runner.py | 21 +++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/task-sdk/tests/task_sdk/execution_time/test_task_runner.py b/task-sdk/tests/task_sdk/execution_time/test_task_runner.py index 0f46b9942cdcd..5234b30b84cbf 100644 --- a/task-sdk/tests/task_sdk/execution_time/test_task_runner.py +++ b/task-sdk/tests/task_sdk/execution_time/test_task_runner.py @@ -3004,7 +3004,7 @@ def execute(self, context): @pytest.mark.enable_redact def test_rendered_templates_mask_nested_keys_with_truncation( - self, create_runtime_ti, mock_supervisor_comms + self, create_runtime_ti, mock_supervisor_comms, monkeypatch ): """Nested sensitive-key masking applies consistently across the truncation path. @@ -3017,12 +3017,25 @@ def test_rendered_templates_mask_nested_keys_with_truncation( """ from airflow.sdk._shared.secrets_masker import _secrets_masker + # Earlier tests in this file (e.g. test_get_connection_from_context) call + # mask_secret(conn.password) where the fixture's password value is the literal + # "password"; that registers "password" as a regex pattern in the singleton + # masker. Without isolation, str(redacted) gets that regex applied and the + # dict KEY name "password" itself becomes "***", obscuring whether the + # structured nested-key walk fired. Reset the regex patterns for this test + # (monkeypatch restores them on teardown) so the assertion can distinguish + # value-masking (what we are testing) from key-token replacement. + masker = _secrets_masker() + monkeypatch.setattr(masker, "patterns", set()) + monkeypatch.setattr(masker, "replacer", None) # The SDK masker starts with an empty sensitive-fields list in the test runtime # (settings.py has not run); register `password` explicitly so the structured # walker has something to match. Production workers get this from settings.py. - masker = _secrets_masker() - if "password" not in masker.sensitive_variables_fields: - masker.sensitive_variables_fields = list(masker.sensitive_variables_fields) + ["password"] + monkeypatch.setattr( + masker, + "sensitive_variables_fields", + list(masker.sensitive_variables_fields) + ["password"], + ) nested_value = "REGRESSION-FIXTURE-NESTED-PASSWORD-VALUE"