Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ Changes can also be flagged with a GitHub label for tracking purposes. The URL o
### Fixed
- Fixed GTM integration to properly handle duplicate notice keys [#6090](https://github.com/ethyca/fides/pull/6090)
- Fix Special-purpose only vendors not correctly encoded in TC string [#6086](https://github.com/ethyca/fides/pull/6086)
- Suppressing SQLAlchemy logging related to caching queries [#6089](https://github.com/ethyca/fides/pull/6089)

## [2.60.0](https://github.com/ethyca/fides/compare/2.59.2...2.60.0)

Expand Down
6 changes: 6 additions & 0 deletions src/fides/api/util/logger.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from loguru._handler import Message

from fides.api.schemas.privacy_request import LogEntry, PrivacyRequestSource
from fides.api.util.sqlalchemy_filter import SQLAlchemyGeneratedFilter
from fides.config import CONFIG, FidesConfig

if TYPE_CHECKING:
Expand Down Expand Up @@ -160,6 +161,11 @@ def setup(config: FidesConfig) -> None:
logging.getLogger(name).handlers = []
logging.getLogger(name).propagate = True

# Apply filter to remove generated timing logs
sqlalchemy_generated_filter = SQLAlchemyGeneratedFilter()
sqlalchemy_engine_logger = logging.getLogger("sqlalchemy.engine.Engine")
sqlalchemy_engine_logger.addFilter(sqlalchemy_generated_filter)

handlers = []

# Configure main sink from config
Expand Down
30 changes: 30 additions & 0 deletions src/fides/api/util/sqlalchemy_filter.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
import logging


class SQLAlchemyGeneratedFilter(logging.Filter):
"""
Logging filter that removes SQLAlchemy engine logs containing timing information.

This filter specifically targets logs containing error messages from sqlalchemy.engine.Engine
which typically come from sqlalchemy.engine.Engine when executing cached queries.
"""

def __init__(self) -> None:
super().__init__()
# List of substrings to filter out
self.patterns = [
"cached since",
"caching disabled",
"does not support caching",
Comment on lines +16 to +18
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

How certain are we that these are the only caching related substrings we need to catch? Are there any other substrings related to PII we might want to guard against (that may or may not be happening now?)

Copy link
Copy Markdown
Contributor Author

@Vagoasdf Vagoasdf Apr 25, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The one we detected during this Issue was related to caching strategies for third party dialects. Docs over here: https://docs.sqlalchemy.org/en/14/core/connections.html#engine-thirdparty-caching point to the following structure for this error

For all third party dialects that don’t support this attribute, the logging for such a dialect will indicate dialect does not support caching.

cached since and caching disabled are failsafe nets for similar error that might contain PII, but haven't showed on the logs yet

]

def filter(self, record: logging.LogRecord) -> bool:
# Only filter records from sqlalchemy.engine.Engine
if record.name == "sqlalchemy.engine.Engine":
message = record.getMessage()
# Check if any of the patterns exist in the message
for pattern in self.patterns:
if pattern in message:
return False # Filter out the log entry

return True # Keep all other log entries
56 changes: 56 additions & 0 deletions tests/ops/util/test_logger.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import logging
import os
from datetime import datetime

Expand All @@ -15,6 +16,7 @@
_log_warning,
suppress_logging,
)
from fides.api.util.sqlalchemy_filter import SQLAlchemyGeneratedFilter
from fides.config import CONFIG


Expand Down Expand Up @@ -165,3 +167,57 @@ def test_suppress_logging_exception_safety(self, loguru_caplog):
assert logger._core.min_level == original_min_level
assert "This message should be suppressed" not in loguru_caplog.text
assert "This message should appear in logs" in loguru_caplog.text


@pytest.mark.unit
class TestSQLAlchemyLogger:
def test_sqlalchemy_logging_filter(self, loguru_caplog):
"""Test that the SQLAlchemyGeneratedFilter correctly filters out unwanted log messages."""

# Create a logger for SQLAlchemy
sqlalchemy_logger = logging.getLogger("sqlalchemy.engine.Engine")
sqlalchemy_logger.setLevel(logging.INFO)

# Add the SQLAlchemyGeneratedFilter
filter = SQLAlchemyGeneratedFilter()
sqlalchemy_logger.addFilter(filter)

# Log messages that should be filtered out
sqlalchemy_logger.info(
"This message was cached since yesterday and should be filtered"
)
sqlalchemy_logger.info(
"This message indicates caching disabled and should be filtered"
)
sqlalchemy_logger.info(
"[dialect redshift+psycopg2 does not support caching 0.00016s] {'email': ('atestingemail@email.com',)"
)

# Log a message that should not be filtered out
sqlalchemy_logger.info("This message should appear in logs")

# Check that only the message that should not be filtered appears in the logs
log_messages = loguru_caplog.text
assert "This message should appear in logs" in log_messages
assert "This message contains no key and should be filtered" not in log_messages
assert (
"This message was cached since yesterday and should be filtered"
not in log_messages
)
assert (
"This message was generated in 0.001s and should be filtered"
not in log_messages
)
assert (
"This message indicates caching disabled and should be filtered"
not in log_messages
)
assert (
"This message does not support caching and should be filtered"
not in log_messages
)
assert "This message is unknown and should be filtered" not in log_messages
assert (
"[dialect redshift+psycopg2 does not support caching 0.00016s] {'email': ('atestingemail@email.com',)"
not in log_messages
)