diff --git a/CHANGELOG.md b/CHANGELOG.md index ff6854e3fc2..02e02113324 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -30,6 +30,7 @@ Changes can also be flagged with a GitHub label for tracking purposes. The URL o ### Fixed - Fixed GTM integration to properly handle duplicate notice keys [#6090](https://github.com/ethyca/fides/pull/6090) - Fix Special-purpose only vendors not correctly encoded in TC string [#6086](https://github.com/ethyca/fides/pull/6086) +- Suppressing SQLAlchemy logging related to caching queries [#6089](https://github.com/ethyca/fides/pull/6089) ## [2.60.0](https://github.com/ethyca/fides/compare/2.59.2...2.60.0) diff --git a/src/fides/api/util/logger.py b/src/fides/api/util/logger.py index 515ef1ae0c3..dc9495a0bd1 100644 --- a/src/fides/api/util/logger.py +++ b/src/fides/api/util/logger.py @@ -15,6 +15,7 @@ from loguru._handler import Message from fides.api.schemas.privacy_request import LogEntry, PrivacyRequestSource +from fides.api.util.sqlalchemy_filter import SQLAlchemyGeneratedFilter from fides.config import CONFIG, FidesConfig if TYPE_CHECKING: @@ -160,6 +161,11 @@ def setup(config: FidesConfig) -> None: logging.getLogger(name).handlers = [] logging.getLogger(name).propagate = True + # Apply filter to remove generated timing logs + sqlalchemy_generated_filter = SQLAlchemyGeneratedFilter() + sqlalchemy_engine_logger = logging.getLogger("sqlalchemy.engine.Engine") + sqlalchemy_engine_logger.addFilter(sqlalchemy_generated_filter) + handlers = [] # Configure main sink from config diff --git a/src/fides/api/util/sqlalchemy_filter.py b/src/fides/api/util/sqlalchemy_filter.py new file mode 100644 index 00000000000..e020e949cc3 --- /dev/null +++ b/src/fides/api/util/sqlalchemy_filter.py @@ -0,0 +1,30 @@ +import logging + + +class SQLAlchemyGeneratedFilter(logging.Filter): + """ + Logging filter that removes SQLAlchemy engine logs containing timing information. + + This filter specifically targets logs containing error messages from sqlalchemy.engine.Engine + which typically come from sqlalchemy.engine.Engine when executing cached queries. + """ + + def __init__(self) -> None: + super().__init__() + # List of substrings to filter out + self.patterns = [ + "cached since", + "caching disabled", + "does not support caching", + ] + + def filter(self, record: logging.LogRecord) -> bool: + # Only filter records from sqlalchemy.engine.Engine + if record.name == "sqlalchemy.engine.Engine": + message = record.getMessage() + # Check if any of the patterns exist in the message + for pattern in self.patterns: + if pattern in message: + return False # Filter out the log entry + + return True # Keep all other log entries diff --git a/tests/ops/util/test_logger.py b/tests/ops/util/test_logger.py index 09b11b54997..a152d31fda7 100644 --- a/tests/ops/util/test_logger.py +++ b/tests/ops/util/test_logger.py @@ -1,3 +1,4 @@ +import logging import os from datetime import datetime @@ -15,6 +16,7 @@ _log_warning, suppress_logging, ) +from fides.api.util.sqlalchemy_filter import SQLAlchemyGeneratedFilter from fides.config import CONFIG @@ -165,3 +167,57 @@ def test_suppress_logging_exception_safety(self, loguru_caplog): assert logger._core.min_level == original_min_level assert "This message should be suppressed" not in loguru_caplog.text assert "This message should appear in logs" in loguru_caplog.text + + +@pytest.mark.unit +class TestSQLAlchemyLogger: + def test_sqlalchemy_logging_filter(self, loguru_caplog): + """Test that the SQLAlchemyGeneratedFilter correctly filters out unwanted log messages.""" + + # Create a logger for SQLAlchemy + sqlalchemy_logger = logging.getLogger("sqlalchemy.engine.Engine") + sqlalchemy_logger.setLevel(logging.INFO) + + # Add the SQLAlchemyGeneratedFilter + filter = SQLAlchemyGeneratedFilter() + sqlalchemy_logger.addFilter(filter) + + # Log messages that should be filtered out + sqlalchemy_logger.info( + "This message was cached since yesterday and should be filtered" + ) + sqlalchemy_logger.info( + "This message indicates caching disabled and should be filtered" + ) + sqlalchemy_logger.info( + "[dialect redshift+psycopg2 does not support caching 0.00016s] {'email': ('atestingemail@email.com',)" + ) + + # Log a message that should not be filtered out + sqlalchemy_logger.info("This message should appear in logs") + + # Check that only the message that should not be filtered appears in the logs + log_messages = loguru_caplog.text + assert "This message should appear in logs" in log_messages + assert "This message contains no key and should be filtered" not in log_messages + assert ( + "This message was cached since yesterday and should be filtered" + not in log_messages + ) + assert ( + "This message was generated in 0.001s and should be filtered" + not in log_messages + ) + assert ( + "This message indicates caching disabled and should be filtered" + not in log_messages + ) + assert ( + "This message does not support caching and should be filtered" + not in log_messages + ) + assert "This message is unknown and should be filtered" not in log_messages + assert ( + "[dialect redshift+psycopg2 does not support caching 0.00016s] {'email': ('atestingemail@email.com',)" + not in log_messages + )