From 085068290e200eb8a955ce82b652f0a2d384444e Mon Sep 17 00:00:00 2001 From: bruno Date: Tue, 25 Mar 2025 15:46:26 -0300 Subject: [PATCH 1/6] Testing suppres logging --- src/fides/api/service/connectors/redshift_connector.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/src/fides/api/service/connectors/redshift_connector.py b/src/fides/api/service/connectors/redshift_connector.py index 14e149770a4..fe945d64d97 100644 --- a/src/fides/api/service/connectors/redshift_connector.py +++ b/src/fides/api/service/connectors/redshift_connector.py @@ -1,5 +1,7 @@ from typing import Dict, Union from urllib.parse import quote_plus +import logging + from loguru import logger from sqlalchemy import text @@ -76,11 +78,13 @@ def create_client(self) -> Engine: def set_schema(self, connection: Connection) -> None: """Sets the search_path for the duration of the session""" config = self.secrets_schema(**self.configuration.secrets or {}) + if config.db_schema: logger.info("Setting Redshift search_path before retrieving data") - stmt = text("SET search_path to :search_path") - stmt = stmt.bindparams(search_path=config.db_schema) - connection.execute(stmt) + with suppress_logging(): + stmt = text("SET search_path to :search_path") + stmt = stmt.bindparams(search_path=config.db_schema) + connection.execute(stmt) # Overrides SQLConnector.query_config def query_config(self, node: ExecutionNode) -> RedshiftQueryConfig: From dc257f6ed658e7069f77b8890da8834335591082 Mon Sep 17 00:00:00 2001 From: bruno Date: Thu, 24 Apr 2025 14:47:12 -0400 Subject: [PATCH 2/6] Base patch from Adrian testing patch --- src/fides/api/util/logger.py | 6 +++++ src/fides/api/util/sqlalchemy_filter.py | 33 +++++++++++++++++++++++++ 2 files changed, 39 insertions(+) create mode 100644 src/fides/api/util/sqlalchemy_filter.py diff --git a/src/fides/api/util/logger.py b/src/fides/api/util/logger.py index 515ef1ae0c3..dc9495a0bd1 100644 --- a/src/fides/api/util/logger.py +++ b/src/fides/api/util/logger.py @@ -15,6 +15,7 @@ from loguru._handler import Message from fides.api.schemas.privacy_request import LogEntry, PrivacyRequestSource +from fides.api.util.sqlalchemy_filter import SQLAlchemyGeneratedFilter from fides.config import CONFIG, FidesConfig if TYPE_CHECKING: @@ -160,6 +161,11 @@ def setup(config: FidesConfig) -> None: logging.getLogger(name).handlers = [] logging.getLogger(name).propagate = True + # Apply filter to remove generated timing logs + sqlalchemy_generated_filter = SQLAlchemyGeneratedFilter() + sqlalchemy_engine_logger = logging.getLogger("sqlalchemy.engine.Engine") + sqlalchemy_engine_logger.addFilter(sqlalchemy_generated_filter) + handlers = [] # Configure main sink from config diff --git a/src/fides/api/util/sqlalchemy_filter.py b/src/fides/api/util/sqlalchemy_filter.py new file mode 100644 index 00000000000..21625db0614 --- /dev/null +++ b/src/fides/api/util/sqlalchemy_filter.py @@ -0,0 +1,33 @@ +import logging + + +class SQLAlchemyGeneratedFilter(logging.Filter): + """ + Logging filter that removes SQLAlchemy engine logs containing timing information. + + This filter specifically targets logs containing timing information patterns + which typically come from sqlalchemy.engine.Engine when displaying query execution times. + """ + + def __init__(self): + super().__init__() + # List of substrings to filter out + self.patterns = [ + "no key", + "cached since", + "generated in", + "caching disabled", + "does not support caching", + "unknown", + ] + + def filter(self, record): + # Only filter records from sqlalchemy.engine.Engine + if record.name == "sqlalchemy.engine.Engine": + message = record.getMessage() + # Check if any of the patterns exist in the message + for pattern in self.patterns: + if pattern in message: + return False # Filter out the log entry + + return True # Keep all other log entries From 5e1238bc0221bbfbfcea1d22ba2918bd55d104d0 Mon Sep 17 00:00:00 2001 From: bruno Date: Thu, 24 Apr 2025 15:59:46 -0400 Subject: [PATCH 3/6] Reducing error list filtering to cache only Adding testing --- src/fides/api/util/sqlalchemy_filter.py | 7 ++--- tests/ops/util/test_logger.py | 35 +++++++++++++++++++++++++ 2 files changed, 37 insertions(+), 5 deletions(-) diff --git a/src/fides/api/util/sqlalchemy_filter.py b/src/fides/api/util/sqlalchemy_filter.py index 21625db0614..6c3ff2f1ce3 100644 --- a/src/fides/api/util/sqlalchemy_filter.py +++ b/src/fides/api/util/sqlalchemy_filter.py @@ -5,20 +5,17 @@ class SQLAlchemyGeneratedFilter(logging.Filter): """ Logging filter that removes SQLAlchemy engine logs containing timing information. - This filter specifically targets logs containing timing information patterns - which typically come from sqlalchemy.engine.Engine when displaying query execution times. + This filter specifically targets logs containing error messages from sqlalchemy.engine.Engine + which typically come from sqlalchemy.engine.Engine when executing cached queries. """ def __init__(self): super().__init__() # List of substrings to filter out self.patterns = [ - "no key", "cached since", - "generated in", "caching disabled", "does not support caching", - "unknown", ] def filter(self, record): diff --git a/tests/ops/util/test_logger.py b/tests/ops/util/test_logger.py index 09b11b54997..477c64cb9fb 100644 --- a/tests/ops/util/test_logger.py +++ b/tests/ops/util/test_logger.py @@ -4,6 +4,7 @@ import pytest from loguru import logger from loguru._handler import Message +import logging from fides.api.schemas.privacy_request import PrivacyRequestSource from fides.api.util.cache import get_cache @@ -16,6 +17,7 @@ suppress_logging, ) from fides.config import CONFIG +from fides.api.util.sqlalchemy_filter import SQLAlchemyGeneratedFilter @pytest.mark.unit @@ -165,3 +167,36 @@ def test_suppress_logging_exception_safety(self, loguru_caplog): assert logger._core.min_level == original_min_level assert "This message should be suppressed" not in loguru_caplog.text assert "This message should appear in logs" in loguru_caplog.text + + +@pytest.mark.unit +class TestSQLAlchemyLogger: + def test_sqlalchemy_logging_filter(self, loguru_caplog): + """Test that the SQLAlchemyGeneratedFilter correctly filters out unwanted log messages.""" + + # Create a logger for SQLAlchemy + sqlalchemy_logger = logging.getLogger("sqlalchemy.engine.Engine") + sqlalchemy_logger.setLevel(logging.INFO) + + # Add the SQLAlchemyGeneratedFilter + filter = SQLAlchemyGeneratedFilter() + sqlalchemy_logger.addFilter(filter) + + # Log messages that should be filtered out + sqlalchemy_logger.info("This message was cached since yesterday and should be filtered") + sqlalchemy_logger.info("This message indicates caching disabled and should be filtered") + sqlalchemy_logger.info("[dialect redshift+psycopg2 does not support caching 0.00016s] {'email': ('atestingemail@email.com',)") + + # Log a message that should not be filtered out + sqlalchemy_logger.info("This message should appear in logs") + + # Check that only the message that should not be filtered appears in the logs + log_messages = loguru_caplog.text + assert "This message should appear in logs" in log_messages + assert "This message contains no key and should be filtered" not in log_messages + assert "This message was cached since yesterday and should be filtered" not in log_messages + assert "This message was generated in 0.001s and should be filtered" not in log_messages + assert "This message indicates caching disabled and should be filtered" not in log_messages + assert "This message does not support caching and should be filtered" not in log_messages + assert "This message is unknown and should be filtered" not in log_messages + assert "[dialect redshift+psycopg2 does not support caching 0.00016s] {'email': ('atestingemail@email.com',)" not in log_messages From b79dde4d3e2bafa6452d148e37eb347b3172c00d Mon Sep 17 00:00:00 2001 From: bruno Date: Thu, 24 Apr 2025 16:02:19 -0400 Subject: [PATCH 4/6] Revert "Testing suppres logging" This reverts commit 085068290e200eb8a955ce82b652f0a2d384444e. --- src/fides/api/service/connectors/redshift_connector.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/src/fides/api/service/connectors/redshift_connector.py b/src/fides/api/service/connectors/redshift_connector.py index fe945d64d97..14e149770a4 100644 --- a/src/fides/api/service/connectors/redshift_connector.py +++ b/src/fides/api/service/connectors/redshift_connector.py @@ -1,7 +1,5 @@ from typing import Dict, Union from urllib.parse import quote_plus -import logging - from loguru import logger from sqlalchemy import text @@ -78,13 +76,11 @@ def create_client(self) -> Engine: def set_schema(self, connection: Connection) -> None: """Sets the search_path for the duration of the session""" config = self.secrets_schema(**self.configuration.secrets or {}) - if config.db_schema: logger.info("Setting Redshift search_path before retrieving data") - with suppress_logging(): - stmt = text("SET search_path to :search_path") - stmt = stmt.bindparams(search_path=config.db_schema) - connection.execute(stmt) + stmt = text("SET search_path to :search_path") + stmt = stmt.bindparams(search_path=config.db_schema) + connection.execute(stmt) # Overrides SQLConnector.query_config def query_config(self, node: ExecutionNode) -> RedshiftQueryConfig: From e46c56d78d905631442c4095ac29677f9e2e9d25 Mon Sep 17 00:00:00 2001 From: bruno Date: Thu, 24 Apr 2025 16:32:25 -0400 Subject: [PATCH 5/6] running static checks --- src/fides/api/util/sqlalchemy_filter.py | 4 +-- tests/ops/util/test_logger.py | 41 +++++++++++++++++++------ 2 files changed, 33 insertions(+), 12 deletions(-) diff --git a/src/fides/api/util/sqlalchemy_filter.py b/src/fides/api/util/sqlalchemy_filter.py index 6c3ff2f1ce3..e020e949cc3 100644 --- a/src/fides/api/util/sqlalchemy_filter.py +++ b/src/fides/api/util/sqlalchemy_filter.py @@ -9,7 +9,7 @@ class SQLAlchemyGeneratedFilter(logging.Filter): which typically come from sqlalchemy.engine.Engine when executing cached queries. """ - def __init__(self): + def __init__(self) -> None: super().__init__() # List of substrings to filter out self.patterns = [ @@ -18,7 +18,7 @@ def __init__(self): "does not support caching", ] - def filter(self, record): + def filter(self, record: logging.LogRecord) -> bool: # Only filter records from sqlalchemy.engine.Engine if record.name == "sqlalchemy.engine.Engine": message = record.getMessage() diff --git a/tests/ops/util/test_logger.py b/tests/ops/util/test_logger.py index 477c64cb9fb..a152d31fda7 100644 --- a/tests/ops/util/test_logger.py +++ b/tests/ops/util/test_logger.py @@ -1,10 +1,10 @@ +import logging import os from datetime import datetime import pytest from loguru import logger from loguru._handler import Message -import logging from fides.api.schemas.privacy_request import PrivacyRequestSource from fides.api.util.cache import get_cache @@ -16,8 +16,8 @@ _log_warning, suppress_logging, ) -from fides.config import CONFIG from fides.api.util.sqlalchemy_filter import SQLAlchemyGeneratedFilter +from fides.config import CONFIG @pytest.mark.unit @@ -183,9 +183,15 @@ def test_sqlalchemy_logging_filter(self, loguru_caplog): sqlalchemy_logger.addFilter(filter) # Log messages that should be filtered out - sqlalchemy_logger.info("This message was cached since yesterday and should be filtered") - sqlalchemy_logger.info("This message indicates caching disabled and should be filtered") - sqlalchemy_logger.info("[dialect redshift+psycopg2 does not support caching 0.00016s] {'email': ('atestingemail@email.com',)") + sqlalchemy_logger.info( + "This message was cached since yesterday and should be filtered" + ) + sqlalchemy_logger.info( + "This message indicates caching disabled and should be filtered" + ) + sqlalchemy_logger.info( + "[dialect redshift+psycopg2 does not support caching 0.00016s] {'email': ('atestingemail@email.com',)" + ) # Log a message that should not be filtered out sqlalchemy_logger.info("This message should appear in logs") @@ -194,9 +200,24 @@ def test_sqlalchemy_logging_filter(self, loguru_caplog): log_messages = loguru_caplog.text assert "This message should appear in logs" in log_messages assert "This message contains no key and should be filtered" not in log_messages - assert "This message was cached since yesterday and should be filtered" not in log_messages - assert "This message was generated in 0.001s and should be filtered" not in log_messages - assert "This message indicates caching disabled and should be filtered" not in log_messages - assert "This message does not support caching and should be filtered" not in log_messages + assert ( + "This message was cached since yesterday and should be filtered" + not in log_messages + ) + assert ( + "This message was generated in 0.001s and should be filtered" + not in log_messages + ) + assert ( + "This message indicates caching disabled and should be filtered" + not in log_messages + ) + assert ( + "This message does not support caching and should be filtered" + not in log_messages + ) assert "This message is unknown and should be filtered" not in log_messages - assert "[dialect redshift+psycopg2 does not support caching 0.00016s] {'email': ('atestingemail@email.com',)" not in log_messages + assert ( + "[dialect redshift+psycopg2 does not support caching 0.00016s] {'email': ('atestingemail@email.com',)" + not in log_messages + ) From c3311766c12265da6d42851414529b4c9b54d901 Mon Sep 17 00:00:00 2001 From: bruno Date: Thu, 24 Apr 2025 16:42:08 -0400 Subject: [PATCH 6/6] Updated Changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 218d14b74fa..742a4da4b93 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -29,6 +29,7 @@ Changes can also be flagged with a GitHub label for tracking purposes. The URL o ### Fixed - Fix Special-purpose only vendors not correctly encoded in TC string [#6086](https://github.com/ethyca/fides/pull/6086) +- Suppressing SQLAlchemy logging related to caching queries [#6089](https://github.com/ethyca/fides/pull/6089) ## [2.60.0](https://github.com/ethyca/fides/compare/2.59.2...2.60.0)