diff --git a/api/projects.py b/api/projects.py index fd607fe..6c56347 100644 --- a/api/projects.py +++ b/api/projects.py @@ -1,7 +1,5 @@ """Project listing endpoints.""" -import traceback - from flask import Blueprint, current_app, jsonify from utils.session_path import get_claude_projects_dir, list_projects, list_sessions, safe_join @@ -79,7 +77,11 @@ def get_project_sessions(project_name): "first_timestamp": meta["first_timestamp"], "last_timestamp": meta["last_timestamp"], }) - except Exception as e: - print(f"[ERROR] Failed to parse {s['id']}: {type(e).__name__}: {e}\n{traceback.format_exc()}") - result.append({**s, "title": "Error parsing session", "error": True, "error_detail": f"{type(e).__name__}: {e}"}) + except Exception: + # Full detail (class, message, traceback) to the server log via + # logger.exception. The per-session card carries only `error: True` + # — the class-name+message string was a leak (issue #25). The + # operator looks at the server log for triage. + current_app.logger.exception("Failed to parse session %s", s["id"]) + result.append({**s, "title": "Error parsing session", "error": True}) return jsonify(result) diff --git a/api/sessions.py b/api/sessions.py index 93ce9a8..5d2d21a 100644 --- a/api/sessions.py +++ b/api/sessions.py @@ -1,7 +1,6 @@ """Session detail and stats endpoints.""" import os -import traceback from flask import Blueprint, current_app, jsonify, abort @@ -39,12 +38,14 @@ def get_session(project_name, session_id): if is_excluded_by_rules(rules, searchable): return jsonify({"error": "Session not found"}), 404 return jsonify(session) - except Exception as e: - tb = traceback.format_exc() - print(f"[ERROR] Failed to parse session {session_id}: {e}\n{tb}") - return jsonify({ - "error": f"Failed to parse session: {type(e).__name__}: {e}", - }), 500 + except Exception: + # Full traceback (class name, message, stack) goes to the server log + # via logger.exception. The HTTP body returns a stable, generic + # message — never the class name or `e` itself, which would leak + # internal field names, file paths, and user values to any client + # (issue #25). + current_app.logger.exception("Failed to parse session %s", session_id) + return jsonify({"error": "Failed to parse session"}), 500 @sessions_bp.route("/api/sessions///stats") @@ -62,9 +63,8 @@ def get_session_stats(project_name, session_id): session = parse_session(filepath) stats = compute_stats(session) return jsonify(stats) - except Exception as e: - tb = traceback.format_exc() - print(f"[ERROR] Failed to compute stats for {session_id}: {e}\n{tb}") - return jsonify({ - "error": f"Failed to compute stats: {type(e).__name__}: {e}", - }), 500 + except Exception: + # Same pattern as get_session above — full detail to the server log, + # generic message in the HTTP body (issue #25). + current_app.logger.exception("Failed to compute stats for %s", session_id) + return jsonify({"error": "Failed to compute session stats"}), 500 diff --git a/tests/test_error_propagation.py b/tests/test_error_propagation.py new file mode 100644 index 0000000..f2eaac4 --- /dev/null +++ b/tests/test_error_propagation.py @@ -0,0 +1,228 @@ +""" +Regression tests for issue #25 — HTTP error responses must not leak +exception class names or message internals. + +Three endpoints previously interpolated `f"{type(e).__name__}: {e}"` into +their JSON error body: + +- GET /api/sessions// (api/sessions.py) +- GET /api/sessions///stats (api/sessions.py) +- GET /api/projects//sessions (api/projects.py — per-session card error_detail) + +This file exercises each via Flask test_client with a payload that triggers +the failure path, asserts a 500 (or 200 for projects, since the per-session +error is per-row), and verifies the response body contains no exception +class names from a defensive blocklist. + +Run: + pytest tests/test_error_propagation.py -v +""" + +from __future__ import annotations + +import json +import sys +from pathlib import Path + +import pytest + +REPO_ROOT = Path(__file__).resolve().parent.parent +sys.path.insert(0, str(REPO_ROOT)) + +from flask import Flask # noqa: E402 + +from api.projects import projects_bp # noqa: E402 +from api.sessions import sessions_bp # noqa: E402 + + +# Defensive blocklist — any of these substrings appearing in a response body +# would mean the leak regressed. Includes common Python builtin exception +# class names plus internal-looking shapes. +_LEAK_TOKENS = [ + "Exception", + "Error", + "KeyError", + "ValueError", + "JSONDecodeError", + "OSError", + "FileNotFoundError", + "TypeError", + "AttributeError", + "Traceback", + "//.jsonl.""" + proj = tmp_path / project + proj.mkdir(exist_ok=True) + p = proj / f"{session_id}.jsonl" + p.write_text(content, encoding="utf-8") + return p + + +# --------------------------------------------------------------------------- +# /api/sessions// +# --------------------------------------------------------------------------- + +class TestGetSessionErrorBody: + + def test_500_on_parse_failure_does_not_leak_class_name(self, tmp_path, client, monkeypatch): + # Force the parser to raise an exception with a class-name + message + # that WOULD leak through the old f-string interpolation if the fix + # regressed. (parse_session is normally tolerant — it swallows per-line + # JSONDecodeError — so we monkeypatch to guarantee we hit the except.) + _write_session(tmp_path, "proj", "abc", "{}") + + def _boom(*args, **kwargs): + raise KeyError("internal_secret_field_id") + + monkeypatch.setattr("api.sessions.parse_session", _boom) + + resp = client.get("/api/sessions/proj/abc") + assert resp.status_code == 500 + body = resp.get_json() + assert isinstance(body, dict) + assert body.get("error") == "Failed to parse session" + # The exception's args include "internal_secret_field_id" — must not + # appear in the response body. + assert "internal_secret_field_id" not in json.dumps(body) + _assert_no_class_name_leak(json.dumps(body)) + + def test_404_on_missing_file_keeps_session_id_safe(self, tmp_path, client): + # Session ID is part of the URL so it appears in the 404 message — + # that's fine; what we're guarding is exception-class leakage, which + # 404 doesn't go through. + resp = client.get("/api/sessions/proj/nope-doesnt-exist") + assert resp.status_code == 404 + body = resp.get_json() + _assert_no_class_name_leak(json.dumps(body)) + + def test_400_on_path_traversal_attempt(self, client): + # safe_join rejects this with ValueError; the 400 path returns a + # generic "Invalid path" message and should not leak. + resp = client.get("/api/sessions/..%2Fevil/abc") + assert resp.status_code in (400, 404) + body = resp.get_json() + _assert_no_class_name_leak(json.dumps(body)) + + +# --------------------------------------------------------------------------- +# /api/sessions///stats +# --------------------------------------------------------------------------- + +class TestGetSessionStatsErrorBody: + + def test_500_on_parse_failure_does_not_leak_class_name(self, tmp_path, client, monkeypatch): + _write_session(tmp_path, "proj", "abc", "{}") + + def _boom(*args, **kwargs): + raise ValueError("invalid literal: '/private/path/secret.json'") + + monkeypatch.setattr("api.sessions.parse_session", _boom) + + resp = client.get("/api/sessions/proj/abc/stats") + assert resp.status_code == 500 + body = resp.get_json() + assert body.get("error") == "Failed to compute session stats" + # The exception value contains a fake-secret path — must not leak. + assert "/private/path" not in json.dumps(body) + _assert_no_class_name_leak(json.dumps(body)) + + +# --------------------------------------------------------------------------- +# /api/projects (per-session card) +# --------------------------------------------------------------------------- + +class TestGetProjectsErrorCard: + + def test_per_session_error_card_omits_error_detail(self, tmp_path, client, monkeypatch): + # parse_session is tolerant of malformed lines, so to exercise the + # except branch deterministically (the one that builds the error + # card), monkeypatch it to raise — same pattern as the session-level + # tests above. + _write_session(tmp_path, "myproj", "deadbeef-aaaa-bbbb-cccc-000000000000", "{}") + + def _boom(*args, **kwargs): + raise KeyError("internal_secret_field_id") + + # api/projects.py imports parse_session inside the handler body, + # so patch the source module rather than the consumer. + monkeypatch.setattr("utils.jsonl_parser.parse_session", _boom) + + resp = client.get("/api/projects/myproj/sessions") + # Pin the response shape so a future wrapper change (e.g. {"sessions": [...]}) + # doesn't silently turn this test green by skipping the per-row scan. + assert resp.status_code == 200 + body = resp.get_json() + assert isinstance(body, list), ( + f"Expected JSON array of session cards; got {type(body).__name__}" + ) + _assert_no_class_name_leak(json.dumps(body)) + error_rows = [r for r in body if isinstance(r, dict) and r.get("error")] + assert error_rows, "Expected at least one per-session error card from the forced parse failure" + for row in error_rows: + assert "error_detail" not in row, ( + "Per-session error card still includes error_detail (issue #25)" + ) + # The exception's args include "internal_secret_field_id" — must not + # appear anywhere in the response. + assert "internal_secret_field_id" not in json.dumps(body) + + +# --------------------------------------------------------------------------- +# Source-level guard +# --------------------------------------------------------------------------- + +class TestNoExceptionInterpolationInSource: + """Static guard: any future PR that re-introduces the + `f"...{type(e).__name__}: {e}..."` pattern in api/ fails this test.""" + + def test_api_files_dont_interpolate_exception_in_jsonify(self): + api_dir = REPO_ROOT / "api" + for py_file in api_dir.glob("*.py"): + src = py_file.read_text(encoding="utf-8") + # Look for the specific footgun: jsonify(...) with f-string that + # contains both `type(e)` or `{e}` AND the word "error". + offending_patterns = [ + "type(e).__name__", # the class-name expose + "{e}\"", # bare {e} ending an f-string + "{e},", # bare {e} in a dict-value f-string + ] + for pat in offending_patterns: + assert pat not in src, ( + f"{py_file.name} contains forbidden pattern {pat!r} " + f"— see issue #25" + )