Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 10 additions & 13 deletions airflow-core/src/airflow/api_fastapi/logging/decorators.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,10 @@ def _mask_connection_fields(extra_fields):
try:
parsed_extra = json.loads(v)
if isinstance(parsed_extra, dict):
masked_extra = {ek: secrets_masker.redact(ev, ek) for ek, ev in parsed_extra.items()}
result[k] = masked_extra
# Connection ``extra`` can carry values under arbitrary key names, so the
# audit-log entry records only *which* ``extra`` fields were present and masks
# every value rather than deciding what to mask from the key name.
result[k] = {ek: "***" for ek in parsed_extra}
else:
result[k] = "Expected JSON object in `extra` field, got non-dict JSON"
except json.JSONDecodeError:
Expand All @@ -66,23 +68,18 @@ def _mask_connection_fields(extra_fields):

def _mask_variable_fields(extra_fields):
"""
Mask the 'val_content' field if 'key_content' is in the mask list.
Mask the variable value.

The variable requests values and args comes in this form:
{'key': 'key_content', 'val': 'val_content', 'description': 'description_content'}

The value is masked unconditionally — the audit log records that a variable
changed, not its contents, so a secret stored under any key name (not just a
sensitive-looking one) is never persisted to the log.
"""
result = {}
keyname = None
for k, v in extra_fields.items():
if k == "key":
keyname = v
result[k] = v
elif keyname and (k == "val" or k == "value"):
x = secrets_masker.redact(v, keyname)
result[k] = x
keyname = None
else:
result[k] = v
result[k] = "***" if k in ("val", "value") else v
return result


Expand Down
81 changes: 80 additions & 1 deletion airflow-core/tests/unit/api_fastapi/logging/test_decorators.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,15 @@
# under the License.
from __future__ import annotations

import json

import pytest

from airflow.api_fastapi.logging.decorators import _sanitize_for_stdlib_log
from airflow.api_fastapi.logging.decorators import (
_mask_connection_fields,
_mask_variable_fields,
_sanitize_for_stdlib_log,
)


class TestSanitizeForStdlibLog:
Expand All @@ -43,3 +49,76 @@ class TestSanitizeForStdlibLog:
)
def test_strips_cr_and_lf(self, raw, expected):
assert _sanitize_for_stdlib_log(raw) == expected


class TestMaskConnectionFields:
"""Connection ``extra`` values must never be recorded verbatim in the audit log.

Secrets routinely live in connection ``extra`` under key names that are not in the
masker's sensitive-key list (``sas_url``, ``endpoint``, ``jdbc_url``, …). Masking by
key name alone therefore lets those values through. ``_mask_connection_fields`` fails
closed instead: every ``extra`` value is masked and only the key names are kept.
"""

def test_masks_extra_value_under_non_sensitive_key(self):
result = _mask_connection_fields(
{"extra": json.dumps({"sas_url": "SAS_LEAK_value", "account_name": "prodacct"})}
)
assert result["extra"] == {"sas_url": "***", "account_name": "***"}

def test_masks_extra_value_under_sensitive_key(self):
result = _mask_connection_fields({"extra": json.dumps({"password": "hunter2"})})
assert result["extra"] == {"password": "***"}

def test_preserves_extra_key_names(self):
result = _mask_connection_fields({"extra": json.dumps({"host": "h", "login": "l", "endpoint": "e"})})
assert set(result["extra"]) == {"host", "login", "endpoint"}
assert all(v == "***" for v in result["extra"].values())

@pytest.mark.enable_redact
def test_top_level_password_is_masked_by_key_name(self):
# Top-level connection fields keep the existing key-name redaction path
# (unchanged by this fix); it only masks when the secrets masker is active.
result = _mask_connection_fields({"connection_id": "c1", "password": "hunter2"})
assert result["connection_id"] == "c1"
assert result["password"] == "***"

def test_non_dict_json_extra_returns_informational_string(self):
result = _mask_connection_fields({"extra": json.dumps(["not", "a", "dict"])})
assert result["extra"] == "Expected JSON object in `extra` field, got non-dict JSON"

def test_non_json_extra_returns_informational_string(self):
result = _mask_connection_fields({"extra": "this is not json"})
assert result["extra"] == "Encountered non-JSON in `extra` field"

def test_empty_extra_passes_through_key_name_redaction(self):
# falsy ``extra`` skips the JSON branch and goes through key-name redaction
result = _mask_connection_fields({"extra": ""})
assert result["extra"] == ""


class TestMaskVariableFields:
"""The variable value is masked unconditionally in the audit log.

A secret stored as a Variable under an innocuous key name (e.g.
``campaign_signing_material``) must not be persisted to the audit log, so the value is
masked regardless of the key name; the key and description are kept.
"""

def test_masks_value_under_non_sensitive_key(self):
result = _mask_variable_fields(
{"key": "campaign_signing_material", "value": "VARVAL_LEAK_token", "description": "d"}
)
assert result == {"key": "campaign_signing_material", "value": "***", "description": "d"}

def test_masks_value_under_sensitive_key(self):
result = _mask_variable_fields({"key": "api_secret", "value": "topsecret"})
assert result == {"key": "api_secret", "value": "***"}

def test_masks_val_alias(self):
result = _mask_variable_fields({"key": "k", "val": "secretval"})
assert result == {"key": "k", "val": "***"}

def test_value_without_key_is_still_masked(self):
result = _mask_variable_fields({"value": "secretval"})
assert result == {"value": "***"}
Loading