From 7e5968aaa1f23a6f85ed71ed336b4a53dc90e7d9 Mon Sep 17 00:00:00 2001 From: Jarek Potiuk Date: Wed, 16 Jun 2021 11:29:45 +0200 Subject: [PATCH] Switch to built-in data structures in SecretsMasker (#16424) Using Iterable in SecretsMasker might cause undesireable side effect in case the object passed as log parameter is an iterable object and actually iterating it is not idempotent. For example in case of botocore, it passes StreamingBody object to log and this object is Iterable. However it can be iterated only once. Masking causes the object to be iterated during logging and results in empty body when actual results are retrieved later. This change only iterates list type of objects and recurrently redacts only dicts/strs/tuples/sets/lists which should never produce any side effects as all those objects do not have side effects when they are accessed. Fixes: #16148 (cherry picked from commit d1d02b62e3436dedfe9a2b80cd1e61954639ca4d) (cherry picked from commit 4c37aeab97fb1f40a20b912402d2747cd5fc1d5a) --- airflow/utils/log/secrets_masker.py | 8 +++----- tests/utils/log/test_secrets_masker.py | 16 ---------------- 2 files changed, 3 insertions(+), 21 deletions(-) diff --git a/airflow/utils/log/secrets_masker.py b/airflow/utils/log/secrets_masker.py index c279b935efd37..6c1b97f5114b6 100644 --- a/airflow/utils/log/secrets_masker.py +++ b/airflow/utils/log/secrets_masker.py @@ -16,7 +16,6 @@ # under the License. """Mask sensitive information from logs""" import collections -import io import logging import re from typing import TYPE_CHECKING, Iterable, Optional, Set, TypeVar, Union @@ -178,7 +177,7 @@ def _redact_all(self, item: "RedactableItem") -> "RedactableItem": elif isinstance(item, (tuple, set)): # Turn set in to tuple! return tuple(self._redact_all(subval) for subval in item) - elif isinstance(item, Iterable): + elif isinstance(item, list): return list(self._redact_all(subval) for subval in item) else: return item @@ -209,12 +208,11 @@ def redact(self, item: "RedactableItem", name: str = None) -> "RedactableItem": elif isinstance(item, (tuple, set)): # Turn set in to tuple! return tuple(self.redact(subval) for subval in item) - elif isinstance(item, io.IOBase): - return item - elif isinstance(item, Iterable): + elif isinstance(item, list): return list(self.redact(subval) for subval in item) else: return item + # I think this should never happen, but it does not hurt to leave it just in case except Exception as e: # pylint: disable=broad-except log.warning( "Unable to redact %r, please report this via . " diff --git a/tests/utils/log/test_secrets_masker.py b/tests/utils/log/test_secrets_masker.py index aa6b8ebaa460d..78a10b5c73cc4 100644 --- a/tests/utils/log/test_secrets_masker.py +++ b/tests/utils/log/test_secrets_masker.py @@ -72,22 +72,6 @@ def test_args(self, logger, caplog): assert caplog.text == "INFO Cannot connect to user:***\n" - def test_non_redactable(self, logger, caplog): - class NonReactable: - def __iter__(self): - raise RuntimeError("force fail") - - def __repr__(self): - return "" - - logger.info("Logging %s", NonReactable()) - - assert caplog.messages == [ - "Unable to redact , please report this via " - + ". Error was: RuntimeError: force fail", - "Logging ", - ] - def test_extra(self, logger, caplog): logger.handlers[0].formatter = ShortExcFormatter("%(levelname)s %(message)s %(conn)s") logger.info("Cannot connect", extra={'conn': "user:password"})