Skip to content

Commit 72a905f

Browse files
committed
Convert server access log to structured context logging
Replace Apache Combined Log Format with structured key-value/JSON output using the existing LOG_FORMAT formatter system. Access log fields are fully configurable via SERVER_ACCESS_LOG_FIELDS, with a verbose default for production and a minimal set in plain dev.
1 parent 96ad632 commit 72a905f

File tree

6 files changed

+123
-113
lines changed

6 files changed

+123
-113
lines changed

plain-dev/plain/dev/core.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ def setup(
6969
self.plain_env = {
7070
"PYTHONUNBUFFERED": "true",
7171
"PLAIN_DEV": "true",
72+
"PLAIN_SERVER_ACCESS_LOG_FIELDS": '["method", "url", "status", "duration_ms", "size"]',
7273
"FORCE_COLOR": "1",
7374
"PYTHONWARNINGS": "default::DeprecationWarning,default::PendingDeprecationWarning",
7475
**os.environ,

plain/plain/logs/README.md

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -195,6 +195,20 @@ Valid log levels: `DEBUG`, `INFO`, `WARNING`, `ERROR`, `CRITICAL`
195195

196196
The server writes access logs to a separate `plain.server.access` logger that always outputs to stdout, regardless of the `LOG_STREAM` setting. This keeps access logs separate from application logs.
197197

198+
Access logs use the same `LOG_FORMAT` setting as the app logger, producing structured output with request fields:
199+
200+
```
201+
[INFO] Request method=GET path="/" status=200 duration_ms=12 size=1234 ip="127.0.0.1" user_agent="Mozilla/5.0..." referer="https://example.com"
202+
```
203+
204+
In JSON format:
205+
206+
```json
207+
{"timestamp": "2024-01-15 10:30:00,123", "level": "INFO", "message": "Request", "logger": "plain.server.access", "method": "GET", "path": "/", "status": 200, "duration_ms": 12, "size": 1234, "ip": "127.0.0.1", "user_agent": "Mozilla/5.0...", "referer": "https://example.com"}
208+
```
209+
210+
Additional fields beyond the defaults are controlled by `SERVER_ACCESS_LOG_FIELDS` (see the server docs).
211+
198212
Access logging is controlled by the `SERVER_ACCESS_LOG` setting (see the server docs). Individual responses can opt out by setting `response.log_access = False`.
199213

200214
## FAQs

plain/plain/logs/configure.py

Lines changed: 15 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -30,6 +30,19 @@ def attach_log_handlers(
3030
logger.addHandler(warning_handler)
3131

3232

33+
def _create_app_formatter(app_log_format: str) -> logging.Formatter:
34+
"""Create a formatter based on the app log format setting."""
35+
match app_log_format:
36+
case "json":
37+
return JSONFormatter("%(json)s")
38+
case "keyvalue":
39+
return KeyValueFormatter("[%(levelname)s] %(message)s %(keyvalue)s")
40+
case _:
41+
raise ValueError(
42+
f"Invalid LOG_FORMAT: {app_log_format!r}. Must be 'keyvalue' or 'json'."
43+
)
44+
45+
3346
def configure_logging(
3447
*,
3548
plain_log_level: int | str,
@@ -66,15 +79,7 @@ def configure_logging(
6679
app_logger.propagate = False
6780

6881
# Determine formatter based on app_log_format
69-
match app_log_format:
70-
case "json":
71-
formatter = JSONFormatter("%(json)s")
72-
case "keyvalue":
73-
formatter = KeyValueFormatter("[%(levelname)s] %(message)s %(keyvalue)s")
74-
case _:
75-
raise ValueError(
76-
f"Invalid LOG_FORMAT: {app_log_format!r}. Must be 'keyvalue' or 'json'."
77-
)
82+
formatter = _create_app_formatter(app_log_format)
7883

7984
attach_log_handlers(
8085
logger=app_logger,
@@ -100,5 +105,5 @@ def configure_logging(
100105
server_access_logger.propagate = False
101106
if had_handlers:
102107
handler = logging.StreamHandler(sys.stdout)
103-
handler.setFormatter(logging.Formatter("%(message)s"))
108+
handler.setFormatter(_create_app_formatter(app_log_format))
104109
server_access_logger.addHandler(handler)

plain/plain/runtime/global_settings.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,17 @@
179179
SERVER_TIMEOUT: int = 30
180180
SERVER_MAX_REQUESTS: int = 0
181181
SERVER_ACCESS_LOG: bool = True
182+
SERVER_ACCESS_LOG_FIELDS: list[str] = [
183+
"method",
184+
"path",
185+
"query",
186+
"status",
187+
"duration_ms",
188+
"size",
189+
"ip",
190+
"user_agent",
191+
"referer",
192+
]
182193
SERVER_GRACEFUL_TIMEOUT: int = 30
183194
SERVER_SENDFILE: bool = True
184195
SERVER_FORWARDED_ALLOW_IPS: str = (

plain/plain/server/README.md

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -79,6 +79,7 @@ SERVER_THREADS = 4
7979
SERVER_TIMEOUT = 30
8080
SERVER_MAX_REQUESTS = 0 # 0 = disabled
8181
SERVER_ACCESS_LOG = True
82+
SERVER_ACCESS_LOG_FIELDS = ["method", "path", "query", "status", "duration_ms", "size", "ip", "user_agent", "referer"]
8283
SERVER_GRACEFUL_TIMEOUT = 30
8384
SERVER_SENDFILE = True
8485
SERVER_FORWARDED_ALLOW_IPS = "127.0.0.1,::1"
@@ -88,6 +89,34 @@ Settings can also be set via environment variables with the `PLAIN_` prefix (e.g
8889

8990
The `WEB_CONCURRENCY` environment variable is supported as an alias for `SERVER_WORKERS`.
9091

92+
### Access log format
93+
94+
Access logs use the same `LOG_FORMAT` setting as the app logger, so they produce structured output in key-value or JSON format:
95+
96+
```
97+
[INFO] Request method=GET path="/" status=200 duration_ms=12 size=1234 ip="127.0.0.1" user_agent="Mozilla/5.0..." referer="https://example.com"
98+
```
99+
100+
See the [logs docs](../logs/README.md) for details on output formats.
101+
102+
### Access log fields
103+
104+
`SERVER_ACCESS_LOG_FIELDS` controls exactly which fields appear in access log entries. The default includes all common fields:
105+
106+
```python
107+
# settings.py (default)
108+
SERVER_ACCESS_LOG_FIELDS = [
109+
"method", "path", "query", "status", "duration_ms", "size",
110+
"ip", "user_agent", "referer",
111+
]
112+
```
113+
114+
Available fields: `method`, `path`, `url`, `query`, `status`, `duration_ms`, `size`, `ip`, `user_agent`, `referer`, `protocol`.
115+
116+
Use `url` for a combined path + query string (e.g., `url="/search?q=hello"`). Use `path` and `query` separately for production log aggregation.
117+
118+
In development, `plain dev` sets a minimal field list for cleaner output (`method`, `url`, `status`, `duration_ms`, `size`). Set `PLAIN_SERVER_ACCESS_LOG_FIELDS` in your environment to override.
119+
91120
### Per-response access log control
92121

93122
Individual responses can opt out of the access log by setting `log_access = False` on the response object. This is useful for noisy endpoints like health checks or asset serving.

plain/plain/server/glogging.py

Lines changed: 53 additions & 103 deletions
Original file line numberDiff line numberDiff line change
@@ -6,44 +6,23 @@
66
# See the LICENSE for more information.
77
#
88
# Vendored and modified for Plain.
9-
import base64
10-
import binascii
119
import datetime
1210
import logging
13-
import os
1411
import sys
15-
import time
1612
import traceback
1713
from typing import Any
1814

15+
from plain.logs.formatters import KeyValueFormatter
16+
1917
# Module-level loggers
2018
log = logging.getLogger("plain.server")
2119
access_log = logging.getLogger("plain.server.access")
2220

23-
# Access log format (Apache Combined Log Format)
24-
ACCESS_LOG_FORMAT = '%(h)s %(l)s %(u)s %(t)s "%(r)s" %(s)s %(b)s "%(f)s" "%(a)s"'
25-
26-
27-
class SafeAtoms(dict[str, Any]):
28-
def __init__(self, atoms: dict[str, Any]) -> None:
29-
dict.__init__(self)
30-
for key, value in atoms.items():
31-
if isinstance(value, str):
32-
self[key] = value.replace('"', '\\"')
33-
else:
34-
self[key] = value
35-
36-
def __getitem__(self, k: str) -> Any:
37-
if k.startswith("{"):
38-
kl = k.lower()
39-
if kl in self:
40-
return super().__getitem__(kl)
41-
else:
42-
return "-"
43-
if k in self:
44-
return super().__getitem__(k)
45-
else:
46-
return "-"
21+
# Maps field names that come from request headers
22+
_HEADER_FIELDS = {
23+
"user_agent": "USER-AGENT",
24+
"referer": "REFERER",
25+
}
4726

4827

4928
def setup_bootstrap_logging(accesslog: bool) -> None:
@@ -70,7 +49,7 @@ def setup_bootstrap_logging(accesslog: bool) -> None:
7049
if accesslog:
7150
_set_handler(
7251
access_log,
73-
logging.Formatter("%(message)s"),
52+
KeyValueFormatter("[%(levelname)s] %(message)s %(keyvalue)s"),
7453
stream=sys.stdout,
7554
)
7655

@@ -91,91 +70,62 @@ def _set_handler(
9170
logger.addHandler(h)
9271

9372

73+
def _get_header(req: Any, header_name: str) -> str:
74+
"""Look up a header value from the request's header list."""
75+
for name, value in req.headers:
76+
if name == header_name:
77+
return value
78+
return ""
79+
80+
9481
def log_access(
9582
resp: Any,
9683
req: Any,
9784
request_time: datetime.timedelta,
9885
) -> None:
9986
"""Log an access entry for a completed request."""
100-
if not access_log.handlers:
87+
if not access_log.handlers or not access_log.isEnabledFor(logging.INFO):
10188
return
10289

103-
safe_atoms = SafeAtoms(_build_atoms(resp, req, request_time))
104-
105-
try:
106-
access_log.info(ACCESS_LOG_FORMAT, safe_atoms)
107-
except Exception:
108-
log.error(traceback.format_exc())
109-
90+
from plain.runtime import settings
11091

111-
def _build_atoms(
112-
resp: Any,
113-
req: Any,
114-
request_time: datetime.timedelta,
115-
) -> dict[str, Any]:
116-
"""Build log atoms from server response and request objects."""
11792
status = resp.status
11893
if isinstance(status, str):
11994
status = status.split(None, 1)[0]
12095

121-
protocol = f"HTTP/{req.version[0]}.{req.version[1]}"
122-
123-
# Get client IP from the server request's peer address
124-
if isinstance(req.peer_addr, tuple):
125-
remote_addr = req.peer_addr[0]
126-
elif isinstance(req.peer_addr, str):
127-
remote_addr = req.peer_addr
128-
else:
129-
remote_addr = "-"
130-
131-
atoms: dict[str, Any] = {}
132-
133-
# Add request headers as {name}i atoms
134-
atoms.update({f"{{{k.lower()}}}i": v for k, v in req.headers})
135-
136-
# Add response headers as {name}o atoms
137-
resp_headers = resp.headers
138-
if hasattr(resp_headers, "items"):
139-
resp_headers = resp_headers.items()
140-
atoms.update({f"{{{k.lower()}}}o": v for k, v in resp_headers})
141-
142-
atoms.update(
143-
{
144-
"h": remote_addr,
145-
"l": "-",
146-
"u": _get_user(atoms) or "-",
147-
"t": time.strftime("[%d/%b/%Y:%H:%M:%S %z]"),
148-
"r": f"{req.method} {req.uri} {protocol}",
149-
"s": status,
150-
"m": req.method,
151-
"U": req.path,
152-
"q": req.query,
153-
"H": protocol,
154-
"b": getattr(resp, "sent", None) is not None and str(resp.sent) or "-",
155-
"B": getattr(resp, "sent", None),
156-
"f": atoms.get("{referer}i", "-"),
157-
"a": atoms.get("{user-agent}i", "-"),
158-
"T": request_time.seconds,
159-
"D": (request_time.seconds * 1000000) + request_time.microseconds,
160-
"M": (request_time.seconds * 1000) + int(request_time.microseconds / 1000),
161-
"L": f"{request_time.seconds}.{request_time.microseconds:06d}",
162-
"p": f"<{os.getpid()}>",
163-
}
164-
)
96+
context: dict[str, Any] = {}
97+
98+
for field in settings.SERVER_ACCESS_LOG_FIELDS:
99+
if field == "method":
100+
context["method"] = req.method
101+
elif field == "path":
102+
context["path"] = req.path
103+
elif field == "status":
104+
context["status"] = int(status)
105+
elif field == "duration_ms":
106+
context["duration_ms"] = int(request_time.total_seconds() * 1000)
107+
elif field == "size":
108+
context["size"] = getattr(resp, "sent", None) or 0
109+
elif field == "ip":
110+
if isinstance(req.peer_addr, tuple):
111+
context["ip"] = req.peer_addr[0]
112+
elif isinstance(req.peer_addr, str):
113+
context["ip"] = req.peer_addr
114+
else:
115+
context["ip"] = ""
116+
elif field == "url":
117+
if req.query:
118+
context["url"] = f"{req.path}?{req.query}"
119+
else:
120+
context["url"] = req.path
121+
elif field == "query":
122+
context["query"] = req.query or ""
123+
elif field == "protocol":
124+
context["protocol"] = f"HTTP/{req.version[0]}.{req.version[1]}"
125+
elif header_name := _HEADER_FIELDS.get(field):
126+
context[field] = _get_header(req, header_name)
165127

166-
return atoms
167-
168-
169-
def _get_user(atoms: dict[str, Any]) -> str | None:
170-
"""Extract username from Basic auth in request headers."""
171-
user = None
172-
http_auth = atoms.get("{authorization}i")
173-
if http_auth and http_auth.lower().startswith("basic"):
174-
auth = http_auth.split(" ", 1)
175-
if len(auth) == 2:
176-
try:
177-
auth = base64.b64decode(auth[1].strip().encode("utf-8"))
178-
user = auth.split(b":", 1)[0].decode("UTF-8")
179-
except (TypeError, binascii.Error, UnicodeDecodeError) as exc:
180-
log.debug("Couldn't get username: %s", exc)
181-
return user
128+
try:
129+
access_log.info("Request", extra={"context": context})
130+
except Exception:
131+
log.error(traceback.format_exc())

0 commit comments

Comments
 (0)