Skip to content

Commit

Permalink
feat: implement support for structured logging (#7126)
Browse files Browse the repository at this point in the history
* feat: implement support for structured logging

* docs: add release notes

* style: add explanatory comment

* chore: test + import fixes

* tests: fix windows tests
  • Loading branch information
wochinge committed Feb 27, 2024
1 parent e0b6924 commit 2a59128
Show file tree
Hide file tree
Showing 6 changed files with 329 additions and 2 deletions.
5 changes: 5 additions & 0 deletions haystack/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,11 @@
from haystack.core.pipeline import Pipeline
from haystack.core.serialization import default_from_dict, default_to_dict
from haystack.dataclasses import Answer, Document, ExtractedAnswer, GeneratedAnswer
import haystack.logging

# Initialize the logging configuration
# This is a no-op unless `structlog` is installed
haystack.logging.configure_logging()

__all__ = [
"component",
Expand Down
97 changes: 97 additions & 0 deletions haystack/logging.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
import logging
import os
import typing
from typing import List

if typing.TYPE_CHECKING:
from structlog.typing import Processor

HAYSTACK_LOGGING_USE_JSON_ENV_VAR = "HAYSTACK_LOGGING_USE_JSON"
HAYSTACK_LOGGING_IGNORE_STRUCTLOG_ENV_VAR = "HAYSTACK_LOGGING_IGNORE_STRUCTLOG"


def configure_logging(use_json: bool = False) -> None:
"""Configure logging for Haystack.
- If `structlog` is not installed, we keep everything as it is. The user is responsible for configuring logging
themselves.
- If `structlog` is installed, we configure it to format log entries including its key-value data. To disable this
behavior set the environment variable `HAYSTACK_LOGGING_IGNORE_STRUCTLOG` to `true`.
- If `structlog` is installed, you can JSON format all logs. Enable this by
- setting the `use_json` parameter to `True` when calling this function
- setting the environment variable `HAYSTACK_LOGGING_USE_JSON` to `true`
"""
try:
import structlog
from structlog.processors import ExceptionRenderer
from structlog.tracebacks import ExceptionDictTransformer

except ImportError:
# structlog is not installed - fall back to standard logging
return

if os.getenv(HAYSTACK_LOGGING_IGNORE_STRUCTLOG_ENV_VAR, "false").lower() == "true":
# If the user wants to ignore structlog, we don't configure it and fall back to standard logging
return

# We roughly follow the structlog documentation here:
# https://www.structlog.org/en/stable/standard-library.html#rendering-using-structlog-based-formatters-within-logging
# This means that we use structlog to format the log entries for entries emitted via `logging` and `structlog`.

shared_processors: List[Processor] = [
# Add the log level to the event_dict for structlog to use
structlog.stdlib.add_log_level,
# Adds the current timestamp in ISO format to logs
structlog.processors.TimeStamper(fmt="iso"),
]

structlog.configure(
processors=shared_processors + [structlog.stdlib.ProcessorFormatter.wrap_for_formatter],
logger_factory=structlog.stdlib.LoggerFactory(),
cache_logger_on_first_use=True,
# This is a filter that will filter out log entries that are below the log level of the root logger.
wrapper_class=structlog.make_filtering_bound_logger(min_level=logging.root.getEffectiveLevel()),
)

renderers: List[Processor]
if os.getenv(HAYSTACK_LOGGING_USE_JSON_ENV_VAR, "false").lower() == "true" or use_json:
renderers = [
ExceptionRenderer(
# don't show locals in production logs - this can be quite sensitive information
ExceptionDictTransformer(show_locals=False)
),
structlog.processors.JSONRenderer(),
]
else:
renderers = [structlog.dev.ConsoleRenderer()]

formatter = structlog.stdlib.ProcessorFormatter(
# These run ONLY on `logging` entries that do NOT originate within
# structlog.
foreign_pre_chain=shared_processors
+ [
# Add the information from the `logging` `extras` to the event dictionary
structlog.stdlib.ExtraAdder()
],
# These run on ALL entries after the pre_chain is done.
processors=[
# Remove _record & _from_structlog. to avoid that this metadata is added to the final log record
structlog.stdlib.ProcessorFormatter.remove_processors_meta,
*renderers,
],
)

handler = logging.StreamHandler()
handler.name = "HaystackLoggingHandler"
# Use OUR `ProcessorFormatter` to format all `logging` entries.
handler.setFormatter(formatter)

root_logger = logging.getLogger()
# avoid adding our handler twice
old_handlers = [
h
for h in root_logger.handlers
if not (isinstance(h, logging.StreamHandler) and h.name == "HaystackLoggingHandler")
]
new_handlers = [handler, *old_handlers]
root_logger.handlers = new_handlers
3 changes: 3 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -122,6 +122,9 @@ extra-dependencies = [
# Tracing
"opentelemetry-sdk",
"ddtrace",

# Structured logging
"structlog",
]

[tool.hatch.envs.test.scripts]
Expand Down
18 changes: 18 additions & 0 deletions releasenotes/notes/structured-logging-2d6cef3fee2b4f0e.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
---
features:
- |
Haystack now supports structured logging out-of-the box.
Logging can be separated into 3 categories:
- If [`structlog`](https://www.structlog.org/en/stable/) is not installed, Haystack will
use the standard Python logging library with whatever configuration is present.
- If `structlog` is installed, Haystack will log through [`structlog`](https://www.structlog.org/en/stable/) using
structlog's console renderer.
To disable structlog, set the environment variable `HAYSTACK_LOGGING_IGNORE_STRUCTLOG_ENV_VAR` to `true`.
- To log in JSON, install [`structlog`](https://www.structlog.org/en/stable/) and
- set the environment variable `HAYSTACK_LOGGING_JSON` to `true` or
- enable JSON logging from Python
```python
import haystack.logging
haystack.logging.configure_logging(use_json=True)
```
206 changes: 206 additions & 0 deletions test/test_logging.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,206 @@
import builtins
import json
import logging
import os
import sys
from datetime import datetime, timezone
from pathlib import Path
from unittest.mock import ANY, Mock

import pytest
from _pytest.capture import CaptureFixture
from _pytest.logging import LogCaptureFixture
from _pytest.monkeypatch import MonkeyPatch

from haystack import logging as haystack_logging


@pytest.fixture(autouse=True)
def reset_logging_config() -> None:
old_handlers = logging.root.handlers.copy()
yield
# Reset the logging configuration after each test to avoid impacting other tests
logging.root.handlers = old_handlers


class TestSkipLoggingConfiguration:
def test_skip_logging_configuration(
self, monkeypatch: MonkeyPatch, capfd: CaptureFixture, caplog: LogCaptureFixture
) -> None:
monkeypatch.setenv("HAYSTACK_LOGGING_IGNORE_STRUCTLOG", "true")
haystack_logging.configure_logging()

logger = logging.getLogger(__name__)
logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"})

# the pytest fixture caplog only captures logs being rendered from the stdlib logging module
assert caplog.messages == ["Hello, structured logging!"]

# Nothing should be captured by capfd since structlog is not configured
assert capfd.readouterr().err == ""

def test_skip_logging_if_structlog_not_installed(
self, monkeypatch: MonkeyPatch, capfd: CaptureFixture, caplog: LogCaptureFixture
) -> None:
monkeypatch.delitem(sys.modules, "structlog", raising=False)
monkeypatch.setattr(builtins, "__import__", Mock(side_effect=ImportError))

haystack_logging.configure_logging()

logger = logging.getLogger(__name__)
logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"})

# the pytest fixture caplog only captures logs being rendered from the stdlib logging module
assert caplog.messages == ["Hello, structured logging!"]

# Nothing should be captured by capfd since structlog is not configured
assert capfd.readouterr().err == ""


class TestStructuredLoggingConsoleRendering:
def test_log_filtering_when_using_debug(self, capfd: CaptureFixture) -> None:
haystack_logging.configure_logging()

logger = logging.getLogger(__name__)
logger.debug("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"})

# Use `capfd` to capture the output of the final structlog rendering result
output = capfd.readouterr().err
assert output == ""

def test_log_filtering_when_using_debug_and_log_level_is_debug(self, capfd: CaptureFixture) -> None:
haystack_logging.configure_logging()

logger = logging.getLogger(__name__)
logger.setLevel(logging.DEBUG)

logger.debug("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"})

# Use `capfd` to capture the output of the final structlog rendering result
output = capfd.readouterr().err
assert output != ""

def test_console_rendered_structured_log(self, capfd: CaptureFixture) -> None:
haystack_logging.configure_logging()

logger = logging.getLogger(__name__)
logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"})

# Use `capfd` to capture the output of the final structlog rendering result
output = capfd.readouterr().err

# Only check for the minute to be a bit more robust
today = datetime.now(tz=timezone.utc).isoformat(timespec="minutes").replace("+00:00", "")
assert today in output

log_level = "warning"
assert log_level in output

assert "Hello, structured logging!" in output

assert "key1" in output
assert "value1" in output

def test_logging_exceptions(self, capfd: CaptureFixture) -> None:
haystack_logging.configure_logging()

logger = logging.getLogger(__name__)

def function_that_raises_and_adds_to_stack_trace():
raise ValueError("This is an error")

try:
function_that_raises_and_adds_to_stack_trace()
except ValueError:
logger.exception("An error happened")

# Use `capfd` to capture the output of the final structlog rendering result
output = capfd.readouterr().err

assert "An error happened" in output


class TestStructuredLoggingJSONRendering:
def test_logging_as_json(self, capfd: CaptureFixture) -> None:
haystack_logging.configure_logging(use_json=True)

logger = logging.getLogger(__name__)
logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"})

# Use `capfd` to capture the output of the final structlog rendering result
output = capfd.readouterr().err
parsed_output = json.loads(output) # should not raise an error

assert parsed_output == {
"event": "Hello, structured logging!",
"key1": "value1",
"key2": "value2",
"level": "warning",
"timestamp": ANY,
}

def test_logging_as_json_enabling_via_env(self, capfd: CaptureFixture, monkeypatch: MonkeyPatch) -> None:
monkeypatch.setenv("HAYSTACK_LOGGING_USE_JSON", "true")
haystack_logging.configure_logging()

logger = logging.getLogger(__name__)
logger.warning("Hello, structured logging!", extra={"key1": "value1", "key2": "value2"})

# Use `capfd` to capture the output of the final structlog rendering result
output = capfd.readouterr().err
parsed_output = json.loads(output) # should not raise an error

assert parsed_output == {
"event": "Hello, structured logging!",
"key1": "value1",
"key2": "value2",
"level": "warning",
"timestamp": ANY,
}

def test_logging_exceptions_json(self, capfd: CaptureFixture) -> None:
haystack_logging.configure_logging(use_json=True)

logger = logging.getLogger(__name__)

def function_that_raises_and_adds_to_stack_trace():
my_local_variable = "my_local_variable" # noqa: F841
raise ValueError("This is an error")

try:
function_that_raises_and_adds_to_stack_trace()
except ValueError:
logger.exception("An error happened ")

# Use `capfd` to capture the output of the final structlog rendering result
output = capfd.readouterr().err
parsed_output = json.loads(output)
assert parsed_output == {
"event": "An error happened ",
"level": "error",
"timestamp": ANY,
"exception": [
{
"exc_type": "ValueError",
"exc_value": "This is an error",
"syntax_error": None,
"is_cause": False,
"frames": [
{
"filename": str(Path.cwd() / "test" / "test_logging.py"),
"lineno": ANY, # otherwise the test breaks if you add a line :-)
"name": "test_logging_exceptions_json",
"line": "",
"locals": None,
},
{
"filename": str(Path.cwd() / "test" / "test_logging.py"),
"lineno": ANY, # otherwise the test breaks if you add a line :-)
"name": "function_that_raises_and_adds_to_stack_trace",
"line": "",
"locals": None,
},
],
}
],
}
2 changes: 0 additions & 2 deletions test/tracing/test_datadog.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,6 @@ def datadog_tracer(monkeypatch: MonkeyPatch) -> ddtrace.Tracer:

tracer = ddtrace.Tracer()

# monkeypatch.setattr(ddtrace, "tracer", tracer)

return tracer


Expand Down

0 comments on commit 2a59128

Please sign in to comment.