Skip to content

Commit

Permalink
use logger with minimized cardinality (#193)
Browse files Browse the repository at this point in the history
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
rreinoldsc and pre-commit-ci[bot] committed Apr 26, 2024
1 parent e1a6d2c commit 2881921
Show file tree
Hide file tree
Showing 4 changed files with 112 additions and 25 deletions.
51 changes: 42 additions & 9 deletions great_expectations_cloud/agent/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@
from great_expectations_cloud.agent.actions.agent_action import ActionResult

LOGGER: Final[logging.Logger] = logging.getLogger(__name__)
# TODO Set in log dict
LOGGER.setLevel(logging.INFO)
HandlerMap = Dict[str, OnMessageCallback]


Expand Down Expand Up @@ -201,6 +203,13 @@ def _handle_event(self, event_context: EventContext) -> ActionResult:
# warning: this method will not be executed in the main thread
self._update_status(job_id=event_context.correlation_id, status=JobStarted())
print(f"Starting job {event_context.event.type} ({event_context.correlation_id}) ")
LOGGER.info(
"Starting job",
extra={
"event_type": event_context.event.type,
"correlation_id": event_context.correlation_id,
},
)
handler = EventHandler(context=self._context)
# This method might raise an exception. Allow it and handle in _handle_event_as_thread_exit
result = handler.handle_event(event=event_context.event, id=event_context.correlation_id)
Expand Down Expand Up @@ -228,20 +237,34 @@ def _handle_event_as_thread_exit(
created_resources=[],
error_stack_trace="The version of the GX Agent you are using does not support this functionality. Please upgrade to latest.",
)
print(
f"Job completed with error: {event_context.event.type} ({event_context.correlation_id}). Ensure agent is up-to-date."
LOGGER.error(
"Job completed with error. Ensure agent is up-to-date.",
extra={
"event_type": event_context.event.type,
"id": event_context.correlation_id,
},
)
else:
status = JobCompleted(
success=True,
created_resources=result.created_resources,
)
print(f"Completed job: {event_context.event.type} ({event_context.correlation_id})")
LOGGER.info(
"Completed job",
extra={
"event_type": event_context.event.type,
"correlation_id": event_context.correlation_id,
},
)
else:
status = build_failed_job_completed_status(error)
print(traceback.format_exc())
print(
f"Job completed with error: {event_context.event.type} ({event_context.correlation_id})"
LOGGER.info(traceback.format_exc())
LOGGER.info(
"Job completed with error",
extra={
"event_type": event_context.event.type,
"correlation_id": event_context.correlation_id,
},
)

self._update_status(job_id=event_context.correlation_id, status=status)
Expand Down Expand Up @@ -322,7 +345,7 @@ def _update_status(self, job_id: str, status: JobStatus) -> None:
job_id: job identifier, also known as correlation_id
status: pydantic model encapsulating the current status
"""
LOGGER.info(f"Updating status: {job_id} - {status}")
LOGGER.info("Updating status", extra={"job_id": job_id, "status": str(status)})
agent_sessions_url = (
f"{self._config.gx_cloud_base_url}/organizations/{self._config.gx_cloud_organization_id}"
+ f"/agent-jobs/{job_id}"
Expand All @@ -349,13 +372,23 @@ def _set_http_session_headers(self, correlation_id: str | None = None) -> None:
):
# TODO: public API should be available in v1
LOGGER.info(
f"Unable to set {HeaderName.USER_AGENT} or {HeaderName.AGENT_JOB_ID} header for requests to GX Cloud"
"Unable to set header for requests to GX Cloud",
extra={
"user_agent": HeaderName.USER_AGENT,
"agent_job_id": HeaderName.AGENT_JOB_ID,
},
)
return

agent_version = self.get_current_gx_agent_version()
LOGGER.debug(
f"Setting session headers for GX Cloud. {HeaderName.USER_AGENT}:{agent_version} {HeaderName.AGENT_JOB_ID}:{correlation_id}"
"Setting session headers for GX Cloud",
extra={
"user_agent": HeaderName.USER_AGENT,
"agent_version": agent_version,
"job_id": HeaderName.AGENT_JOB_ID,
"correlation_id": correlation_id,
},
)

if correlation_id:
Expand Down
50 changes: 38 additions & 12 deletions great_expectations_cloud/logging/logging_cfg.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@
import logging.handlers
import pathlib
from datetime import datetime, timezone
from typing import Any, Final, Literal
from typing import Any, ClassVar, Final, Literal

from typing_extensions import override

Expand Down Expand Up @@ -137,8 +137,25 @@ class JSONFormatter(logging.Formatter):
"""
All custom formatting is done through subclassing this Formatter class
Note: Defined within fn bc parametrization of Formatters is not supported by dictConfig
"""

_SKIP_KEYS: ClassVar[frozenset[str]] = frozenset(
[
"exc_text",
"levelno",
"lineno",
"msecs",
"msg",
"name",
"pathname",
"process",
"processName",
"thread",
"threadName",
]
)

def __init__(
self,
fmt: str | None = None,
Expand All @@ -155,19 +172,28 @@ def __init__(

@override
def format(self, record: logging.LogRecord) -> str:
optionals = {}
log_full = record.__dict__

log_full["event"] = record.msg
log_full["level"] = record.levelname
log_full["logger"] = record.name
log_full["timestamp"] = datetime.fromtimestamp(record.created, tz=timezone.utc).isoformat()

base_tags = {
"event": record.msg,
"level": record.levelname,
"logger": record.name,
"timestamp": datetime.fromtimestamp(record.created, tz=timezone.utc).isoformat(),
}
if record.exc_info:
optionals["exc_info"] = str(record.exc_info)
if record.stack_info:
optionals["stack_info"] = record.stack_info
log_full["exc_info"] = str(record.exc_info)

complete_dict = {**base_tags, **self.custom_tags, **optionals}
if record.args:
log_full["args"] = str(record.args)

log_subset = {
key: value
for key, value in log_full.items()
if key is not None and key not in self._SKIP_KEYS
}

complete_dict = {
**log_subset,
**self.custom_tags,
}

return json.dumps(complete_dict)
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "great_expectations_cloud"
version = "20240425.0"
version = "20240426.0"
description = "Great Expectations Cloud"
authors = ["The Great Expectations Team <team@greatexpectations.io>"]
repository = "https://github.com/great-expectations/cloud"
Expand Down
34 changes: 31 additions & 3 deletions tests/test_logging_cfg.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import uuid
from logging import makeLogRecord
from pathlib import Path
from typing import Any

import freezegun
import pytest
Expand Down Expand Up @@ -39,6 +40,13 @@ def logfile_path():
return Path(DEFAULT_LOG_DIR, DEFAULT_LOG_FILE)


def is_subset(small_set: dict[str, Any], big_set: dict[str, Any]):
"""
Returns if dict1 a subset of dict2
"""
return not bool(set(small_set) - set(big_set))


class TestLogLevel:
@pytest.mark.parametrize(
"log_level",
Expand Down Expand Up @@ -80,7 +88,7 @@ def test_json_formatter(custom_tags):

expected = {**default_log_formatted, **custom_tags}

assert actual == expected
assert is_subset(expected, actual)


@freezegun.freeze_time(TIMESTAMP)
Expand All @@ -90,7 +98,27 @@ def test_json_formatter_exc_info():
log_record = makeLogRecord({**default_log_emitted, "exc_info": (1, 2, 3)})
out_str = fmt.format(log_record)
actual = json.loads(out_str)
assert actual == expected
assert is_subset(expected, actual)


@freezegun.freeze_time(TIMESTAMP)
def test_json_formatter_extra():
expected = {**default_log_formatted, "user": "123"}
fmt = JSONFormatter()
# Not: makeLogRecord does not account for extra kwarg
log_record = logging.getLogger(default_log_emitted["name"]).makeRecord(
default_log_emitted["name"],
logging.DEBUG,
"fn_name",
0,
default_log_emitted["msg"],
(),
None,
extra={"user": "123"},
)
out_str = fmt.format(log_record)
actual = json.loads(out_str)
assert is_subset(expected, actual)


@freezegun.freeze_time(TIMESTAMP)
Expand All @@ -101,7 +129,7 @@ def test_json_formatter_stack_info():
log_record = makeLogRecord({**default_log_emitted, "stack_info": stack_info})
out_str = fmt.format(log_record)
actual = json.loads(out_str)
assert actual == expected
assert is_subset(expected, actual)


def test_logfile(fs, logfile_path):
Expand Down

0 comments on commit 2881921

Please sign in to comment.