diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..fbf67ec --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,126 @@ +# Contributing + +Thanks for considering a patch. This repo is a small Flask app plus a hash-routed SPA and a CLI export script. Keep changes focused and tested. + +## Development setup + +### Prerequisites + +- **Python 3.12** (matches CI) +- **Node 20+** (only if you change `static/js/` or run frontend unit tests) + +### Bootstrap (Windows PowerShell) + +```powershell +git clone https://github.com/cppalliance/claude-code-chat-browser.git +cd claude-code-chat-browser +python -m venv .venv +.\.venv\Scripts\Activate.ps1 +pip install -r requirements-dev.txt +``` + +### Bootstrap (macOS / Linux) + +```bash +git clone https://github.com/cppalliance/claude-code-chat-browser.git +cd claude-code-chat-browser +python -m venv .venv +source .venv/bin/activate +pip install -r requirements-dev.txt +``` + +### Run the dev server + +```bash +python app.py --port 5000 +# Open http://127.0.0.1:5000 +``` + +Useful flags: + +- `--base-dir PATH` — point at a different `projects/` tree (for tests or fixtures) +- `--exclude-rules PATH` — session exclusion rules file +- `--host 0.0.0.0` — listen on all interfaces (use only on trusted networks) + +## Running tests + +### Python + +```bash +pytest -q # full suite + coverage (see pyproject.toml) +pytest tests/test_api_integration.py -v +pytest tests/test_search.py -v +pytest tests/test_api_routes.py -v +pytest tests/test_error_codes.py -v +``` + +### JavaScript (vitest) + +Only needed when editing `static/js/`: + +```bash +npm ci +npm test +npm run test:coverage # optional +``` + +`node_modules/` is gitignored — run `npm ci` after clone. + +## Code style and conventions + +| Area | Convention | +|------|------------| +| **API errors** | Use `error_response()` from [`api/error_codes.py`](api/error_codes.py). Do not call `jsonify({"error": ...})` without a `code` field. Add new members to `ErrorCode` and a row in `tests/test_error_codes.py`. | +| **Exception leakage** | `5xx` bodies are generic messages only. Log full tracebacks with `current_app.logger.exception(...)`. Never put `str(e)` or class names in HTTP JSON (issue #25). | +| **Path safety** | Use `safe_join()` from `utils/session_path.py` for any path built from URL segments. | +| **Imports** | stdlib → third-party → local, blank line between groups. | +| **Line length** | ~100 characters; no enforced formatter yet. | + +## Tests required for common changes + +| Change | Add or update | +|--------|----------------| +| New HTTP route | Happy + error path in `tests/test_api_routes.py` or `tests/test_api_integration.py` | +| New `ErrorCode` | Parametrized row in `tests/test_error_codes.py` | +| Search / limit validation | `tests/test_search.py` | +| New `_parse_tool_result` dispatch entry | Fixture + assertion in `tests/test_jsonl_parser.py` | +| CLI behavior | `tests/test_cli_e2e.py` (subprocess) or `tests/test_cli_args.py` (parser only) | +| Frontend shared module | `static/js/shared/*.test.js` (vitest) | +| Error response shape | `tests/test_error_propagation.py` regression | + +## Branching and pull requests + +- Default branch: **`master`**. Do not push directly to `master`. +- Branch names: `feat/`, `fix/`, `test/`, `chore/`, `docs/`. +- One logical change per PR when possible. +- PR checklist: + - [ ] `pytest -q` green locally + - [ ] `npm test` green if JS changed + - [ ] CI jobs green (`pytest`, `integration-tests`, `js-tests`, `prod-install-smoke`) + - [ ] PR description includes a **Test plan** section + - [ ] API changes update [`docs/api-reference.md`](docs/api-reference.md) if behavior or errors change + +## Where things live + +| Task | Location | +|------|----------| +| Add HTTP route | `api/.py`, register blueprint in [`app.py`](app.py) | +| Add stable error code | [`api/error_codes.py`](api/error_codes.py) | +| Parse JSONL / tool results | [`utils/jsonl_parser.py`](utils/jsonl_parser.py) — see [dispatch table notes](docs/architecture.md#dispatch-table) | +| Project/session discovery | [`utils/session_path.py`](utils/session_path.py) | +| Session statistics | [`utils/session_stats.py`](utils/session_stats.py) | +| Bulk / per-session export | [`api/export_api.py`](api/export_api.py), [`utils/md_exporter.py`](utils/md_exporter.py) | +| Export state on disk | [`utils/export_state_store.py`](utils/export_state_store.py) | +| Exclusion rules | [`utils/exclusion_rules.py`](utils/exclusion_rules.py) | +| CLI export | [`scripts/export.py`](scripts/export.py) | +| SPA shell + routing | [`static/index.html`](static/index.html), [`static/js/app.js`](static/js/app.js) | +| Shared frontend utilities | [`static/js/shared/`](static/js/shared/) | +| API documentation | [`docs/api-reference.md`](docs/api-reference.md) | + +## Architecture + +See [`docs/architecture.md`](docs/architecture.md) for data flow, export state machine, and component diagram. + +## Getting help + +Open an issue with a clear repro or propose a draft PR early for CI feedback. diff --git a/README.md b/README.md index 444c007..76827d3 100644 --- a/README.md +++ b/README.md @@ -21,20 +21,11 @@ Browse and export Claude Code chat history — Web GUI and CLI. - **Per-model badges** in session header - **Bulk export** — download all sessions, incremental updates, or latest-day slice as a zip; if there is nothing to export, the API returns **422** with JSON body `{"error": "Nothing to export", "code": "EXPORT_NOTHING_TO_EXPORT", "since": ""}` (the `since` field echoes your request: `"all"`, `"last"`, or `"incremental"`) instead of an empty zip -### API error codes +### API -JSON error responses include a machine-readable `"code"` (stable `UPPER_SNAKE_CASE`) and a human-readable `"error"` message. Common codes: +REST endpoints for projects, sessions, search, and export are documented in **[`docs/api-reference.md`](docs/api-reference.md)**. -| Code | Typical HTTP | Meaning | -|------|--------------|---------| -| `SEARCH_INVALID_LIMIT` | 400 | Query param `limit` is not a positive integer | -| `INVALID_PATH` | 400 | Path traversal or unsafe project/session path | -| `SESSION_NOT_FOUND` | 404 | Session file missing or excluded | -| `INVALID_REQUEST_BODY` | 400 | POST body is not a JSON object | -| `INVALID_SINCE_MODE` | 400 | Bulk export `since` is not `all`, `last`, or `incremental` | -| `EXPORT_NOTHING_TO_EXPORT` | 422 | No sessions matched the export scope | -| `PARSE_ERROR` | 500 | Session file could not be parsed | -| `INTERNAL_ERROR` | 500 | Unexpected failure (e.g. stats computation) | +JSON error responses include a machine-readable `"code"` (stable `UPPER_SNAKE_CASE`) and a human-readable `"error"` message. See the [error code catalog](docs/api-reference.md#error-code-catalog) for the full table. ### CLI Export - Standalone script to export all sessions to Markdown with YAML frontmatter @@ -105,14 +96,20 @@ Reads from `~/.claude/projects/` which contains JSONL session files created by C ## Project Structure -``` +See **[`docs/architecture.md`](docs/architecture.md)** for layered design, data flow, and the dispatch-table ordering rationale. + +```text claude-code-chat-browser/ ├── app.py # Flask entry point (default port 5000) ├── api/ +│ ├── error_codes.py # ErrorCode enum + error_response() helper │ ├── projects.py # Project listing & session counts │ ├── sessions.py # Session parsing & message delivery │ ├── search.py # Full-text search across sessions │ └── export_api.py # Bulk zip and per-session Markdown export +├── docs/ +│ ├── api-reference.md # HTTP API reference (routes, errors, examples) +│ └── architecture.md # Component diagram and data flow ├── utils/ │ ├── session_path.py # OS-aware path detection & project naming │ ├── jsonl_parser.py # JSONL session parser with tool result classification @@ -122,24 +119,28 @@ claude-code-chat-browser/ ├── static/ │ ├── index.html # SPA entry point (Inter font, minimal markup) │ ├── css/style.css # Dark/light theme, responsive, animations -│ └── js/app.js # Hash-based routing, rendering, UI components +│ └── js/ # ES modules (app.js, route handlers, shared/) +├── CONTRIBUTING.md # Dev setup, tests, PR conventions └── tests/ ``` ## Development -To run the test suite, install the dev requirements (Flask + pytest): +See **[`CONTRIBUTING.md`](CONTRIBUTING.md)** for full setup, conventions, and where to change each layer. + +Quick start: ```bash pip install -r requirements-dev.txt pytest +npm ci && npm test # only if you changed static/js/ ``` -`requirements.txt` carries only the runtime dep (Flask); `requirements-dev.txt` pulls it in via `-r` and adds pytest. +`requirements.txt` carries only the runtime dep (Flask); `requirements-dev.txt` pulls it in via `-r` and adds pytest (+ coverage). Frontend tests use vitest (`package.json`). ## Continuous integration -Every push and pull request runs **`pytest`** on **Ubuntu** (Python 3.12) via [`.github/workflows/ci.yml`](.github/workflows/ci.yml). A separate job verifies that `pip install -r requirements.txt` (production-only) is sufficient to import and boot the app. +Every push and pull request runs **`pytest`**, **API integration tests**, and **vitest** on **Ubuntu** (Python 3.12, Node 20) via [`.github/workflows/ci.yml`](.github/workflows/ci.yml). A separate job verifies that `pip install -r requirements.txt` (production-only) is sufficient to import and boot the app. ## Exported Markdown Format diff --git a/api/projects.py b/api/projects.py index df3f9c6..94b99d5 100644 --- a/api/projects.py +++ b/api/projects.py @@ -3,6 +3,7 @@ from flask import Blueprint, current_app from api._flask_types import FlaskReturn, json_error, json_response +from api.error_codes import ErrorCode, error_response from models.project import ProjectSessionRowDict, SessionListItemDict from models.session import SessionDict from utils.session_path import get_claude_projects_dir, list_projects, list_sessions, safe_join @@ -76,7 +77,7 @@ def get_project_sessions(project_name: str) -> FlaskReturn: try: project_dir = safe_join(base, project_name) except ValueError: - return json_response([]), 400 + return error_response(ErrorCode.INVALID_PATH, "Invalid path", 400) sessions = list_sessions(project_dir) # Add summary preview for each session from utils.jsonl_parser import parse_session diff --git a/api/sessions.py b/api/sessions.py index 320816d..5b6979f 100644 --- a/api/sessions.py +++ b/api/sessions.py @@ -74,6 +74,13 @@ def get_session_stats(project_name: str, session_id: str) -> FlaskReturn: try: session = parse_session(filepath) + rules = current_app.config.get("EXCLUSION_RULES") or [] + if is_session_excluded(rules, session, project_name): + return error_response( + ErrorCode.SESSION_NOT_FOUND, + "Session not found", + 404, + ) except _PARSE_ERRORS: current_app.logger.exception("Failed to parse session %s", session_id) return error_response( diff --git a/docs/api-reference.md b/docs/api-reference.md new file mode 100644 index 0000000..b14d8ed --- /dev/null +++ b/docs/api-reference.md @@ -0,0 +1,423 @@ +# API Reference + +HTTP API for **claude-code-chat-browser**. All `/api/*` routes return JSON unless noted. The bundled SPA at `GET /` is the primary client; these endpoints are also suitable for scripts and integrations on the same machine. + +**Base URL (default):** `http://127.0.0.1:5000` + +**Source of truth for error codes:** [`api/error_codes.py`](../api/error_codes.py) + +--- + +## Authentication + +None. The server binds to `127.0.0.1` by default and reads `~/.claude/projects/` as the local user. Do not expose it on a public network without adding authentication — there is no per-user authorization model. + +--- + +## Error envelope + +Most `/api/*` error responses use this shape: + +```json +{ + "error": "Human-readable message", + "code": "MACHINE_READABLE_CODE" +} +``` + +All documented error paths below use the structured envelope. + +Extra fields may appear for specific codes (for example `since` on invalid bulk-export mode). + +| Field | Stability | Notes | +|-------|-----------|-------| +| `code` | Stable | `UPPER_SNAKE_CASE` string from `ErrorCode` enum | +| `error` | May be reworded | Kept for SPA compatibility | +| HTTP status | Stable per code | Use `code` + status together | + +### Error code catalog + +| `code` | HTTP | Routes | Meaning | +|--------|------|--------|---------| +| `SEARCH_INVALID_LIMIT` | 400 | `GET /api/search` | Query param `limit` is not a positive integer | +| `INVALID_PATH` | 400 | Session, stats, export session | Path traversal or rejected URL segment | +| `SESSION_NOT_FOUND` | 404 | Session, stats, export session | File missing on disk or session excluded by rules | +| `INVALID_REQUEST_BODY` | 400 | `POST /api/export` | Body is not a JSON object | +| `INVALID_SINCE_MODE` | 400 | `POST /api/export` | `since` is not `all`, `last`, or `incremental` | +| `PARSE_ERROR` | 500 | Session, stats, export session | JSONL file could not be parsed | +| `EXPORT_NOTHING_TO_EXPORT` | 422 | `POST /api/export` | No sessions matched the requested slice | +| `INTERNAL_ERROR` | 500 | `GET .../stats`, export session | Unexpected failure after parse (e.g. stats computation) | + +--- + +## Exception-leakage policy + +`5xx` responses never include exception class names, tracebacks, or file paths. The body is always the generic message documented per route. Full exceptions are logged server-side via `logger.exception`. See [`tests/test_error_propagation.py`](../tests/test_error_propagation.py) (issue #25). + +--- + +## Exclusion rules + +Sessions can be filtered by an exclusion rules file (default `~/.claude-code-chat-browser/exclusion-rules.txt`, overridable with `python app.py --exclude-rules PATH`). Excluded sessions: + +- Are omitted from `GET /api/projects//sessions` and search results +- Return `404` with `SESSION_NOT_FOUND` on detail, stats, and per-session export routes + +Grammar and matching: [`utils/exclusion_rules.py`](../utils/exclusion_rules.py). + +--- + +## Endpoints + +### `GET /` + +**Source:** [`app.py`](../app.py) + +Serves the single-page application shell (`static/index.html`). Hash-based client routing handles all UI navigation. + +| | | +|--|--| +| **Response** | `200` — `text/html` | +| **Errors** | None | + +```bash +curl -s http://127.0.0.1:5000/ -o /dev/null -w "%{http_code}\n" +``` + +--- + +### `GET /api/projects` + +**Source:** [`api/projects.py`](../api/projects.py) + +Lists every project directory under the Claude projects root that contains at least one `.jsonl` session file. Counts and `last_modified` reflect **titled** sessions only (via `quick_session_info` peek, not full parse). + +#### Query parameters + +None. + +#### Response — `200 OK` + +`application/json` — array of project objects: + +| Field | Type | Description | +|-------|------|-------------| +| `name` | string | Directory name under `~/.claude/projects/` (e.g. `F--boost-capy`) | +| `path` | string | Absolute path to project directory | +| `display_name` | string | Friendly name derived from session `cwd` when available | +| `session_count` | integer | Count of titled sessions (updated in handler) | +| `last_modified` | string (ISO 8601) | Latest message timestamp across titled sessions | + +```json +[ + { + "name": "F--boost-capy", + "path": "/home/user/.claude/projects/F--boost-capy", + "display_name": "Boost-capy", + "session_count": 12, + "last_modified": "2026-05-20T22:14:03+00:00" + } +] +``` + +Empty projects root → `[]`. + +#### Errors + +None. + +```bash +curl -s http://127.0.0.1:5000/api/projects | jq '.[0]' +``` + +--- + +### `GET /api/projects//sessions` + +**Source:** [`api/projects.py`](../api/projects.py) + +Lists sessions in one project with summary fields for the workspace sidebar. Skips untitled sessions and sessions matched by exclusion rules. + +#### Path parameters + +| Name | Type | Description | +|------|------|-------------| +| `project_name` | string | Project directory name; must not contain `..` | + +#### Response — `200 OK` + +`application/json` — array of session row objects: + +| Field | Type | Description | +|-------|------|-------------| +| `id` | string | Session id (filename without `.jsonl`) | +| `path` | string | Absolute path to JSONL file | +| `size_bytes` | integer | File size | +| `modified` | number | File mtime (epoch seconds) | +| `title` | string | Parsed session title | +| `models` | string[] | Models used in session | +| `tokens` | integer | Sum of input + output tokens | +| `tool_calls` | integer | Total tool calls | +| `first_timestamp` | string \| null | First message timestamp | +| `last_timestamp` | string \| null | Last message timestamp | +| `error` | boolean | Optional; `true` if parse failed (card shows error state) | + +#### Errors + +| Status | `code` | When | +|--------|--------|------| +| 400 | `INVALID_PATH` | Invalid `project_name` (path escape) | + +```bash +curl -s "http://127.0.0.1:5000/api/projects/F--boost-capy/sessions" | jq '.[0]' +``` + +--- + +### `GET /api/sessions//` + +**Source:** [`api/sessions.py`](../api/sessions.py) + +Returns the full parsed session: title, metadata, and messages (including tool calls and thinking blocks). + +#### Path parameters + +| Name | Type | Description | +|------|------|-------------| +| `project_name` | string | Project directory name | +| `session_id` | string | JSONL basename without `.jsonl` extension | + +#### Response — `200 OK` + +`application/json` — session object: + +| Top-level field | Type | Description | +|-----------------|------|-------------| +| `session_id` | string | Session identifier | +| `title` | string | Inferred title from first human message | +| `messages` | array | Ordered message objects (`role`, `text`/`content`, tool fields, etc.) | +| `metadata` | object | Tokens, models, timestamps, file activity, tool counts, `cwd`, `git_branch`, … | + +See [`utils/jsonl_parser.py`](../utils/jsonl_parser.py) `parse_session()` for the full metadata shape. + +#### Errors + +| Status | `code` | When | +|--------|--------|------| +| 400 | `INVALID_PATH` | Path traversal in URL | +| 404 | `SESSION_NOT_FOUND` | File missing or session excluded | +| 500 | `PARSE_ERROR` | Malformed JSONL | + +```bash +curl -s "http://127.0.0.1:5000/api/sessions/F--boost-capy/session_abc123" | jq '.title' +``` + +--- + +### `GET /api/sessions///stats` + +**Source:** [`api/sessions.py`](../api/sessions.py) + +Computed aggregates for one session without returning the message list. + +#### Path parameters + +Same as session detail. + +#### Response — `200 OK` + +`application/json` — stats object from [`utils/session_stats.py`](../utils/session_stats.py) `compute_stats()`: + +| Field | Type | Description | +|-------|------|-------------| +| `files_touched` | object | `read`, `written`, `created`, `total_unique` file lists | +| `commands_run` | array | Bash commands with exit metadata | +| `urls_accessed` | string[] | Web fetch URLs | +| `conversation_turns` | integer | Human/assistant turn count | +| `wall_clock_seconds` | number \| null | Session duration | +| `wall_clock_display` | string \| null | Human-readable duration | +| `cost_estimate_usd` | number | Best-effort USD estimate from token usage | +| `tool_result_summary` | object | Aggregated tool result stats | +| `stop_reason_summary` | object | Stop reason counts | +| `entry_type_counts` | object | JSONL entry type counts | +| `sidechain_message_count` | integer | Sidechain entries | +| `api_error_count` | integer | API errors in session | +| `compaction_events` | array | Context compaction markers | + +#### Errors + +| Status | `code` | When | +|--------|--------|------| +| 400 | `INVALID_PATH` | Path traversal | +| 404 | `SESSION_NOT_FOUND` | File missing | +| 500 | `PARSE_ERROR` | JSONL malformed | +| 500 | `INTERNAL_ERROR` | `compute_stats` failed after successful parse | + +```bash +curl -s "http://127.0.0.1:5000/api/sessions/F--boost-capy/session_abc123/stats" | jq '.cost_estimate_usd' +``` + +--- + +### `GET /api/search` + +**Source:** [`api/search.py`](../api/search.py) + +Case-insensitive substring search across all non-excluded messages in all projects. Linear scan — suitable for local history size, not indexed search. + +#### Query parameters + +| Name | Type | Default | Description | +|------|------|---------|-------------| +| `q` | string | `""` | Search string; whitespace stripped; empty → `[]` | +| `limit` | integer | `50` | Max results; must be ≥ 1; **capped at 500** | + +#### Response — `200 OK` + +`application/json` — array of hit objects: + +| Field | Type | Description | +|-------|------|-------------| +| `project` | string | Project `name` | +| `session_id` | string | Session id | +| `title` | string | Session title | +| `role` | string | Message role (`human`, `assistant`, …) | +| `timestamp` | string \| null | Message timestamp | +| `snippet` | string | ~160 chars around match | + +#### Errors + +| Status | `code` | When | +|--------|--------|------| +| 400 | `SEARCH_INVALID_LIMIT` | `limit` not a positive integer (e.g. `abc`, `0`, `1.5`) | + +```bash +curl -s "http://127.0.0.1:5000/api/search?q=parser&limit=10" | jq '.[0]' +curl -s "http://127.0.0.1:5000/api/search?q=test&limit=abc" # → 400 +``` + +--- + +### `GET /api/export/state` + +**Source:** [`api/export_api.py`](../api/export_api.py) + +Read-only snapshot of bulk-export state persisted under `~/.claude-code-chat-browser/export-state.json`. + +#### Response — `200 OK` + +| Field | Type | Description | +|-------|------|-------------| +| `last_export_time` | string \| null | ISO timestamp of last completed bulk export | +| `last_export_session_count` | integer | Sessions in last bulk export run | +| `export_count` | integer | **Legacy alias** — same value as `last_export_session_count`; prefer `last_export_session_count` in new integrations (kept for SPA backwards compatibility) | + +```json +{ + "last_export_time": "2026-05-20T18:42:11.123456", + "last_export_session_count": 17, + "export_count": 17 +} +``` + +#### Errors + +None. + +```bash +curl -s http://127.0.0.1:5000/api/export/state | jq +``` + +--- + +### `POST /api/export` + +**Source:** [`api/export_api.py`](../api/export_api.py) + +Bulk-export sessions as a zip of Markdown files (plus `manifest.jsonl`). Updates export state when at least one session is exported. + +#### Request body + +`application/json` + +| Field | Type | Required | Values | +|-------|------|----------|--------| +| `since` | string | no (default `"all"`) | `all` — every non-excluded titled session; `last` — latest UTC activity day; `incremental` — new/changed since last export | + +#### Response — `200 OK` + +`application/zip` with `Content-Disposition: attachment` + +Filename pattern: + +| `since` | Example filename | +|---------|------------------| +| `all` | `claude-code-export-2026-05-21.zip` | +| `last` | `claude-code-export-last-05-21-2026-05-21.zip` | +| `incremental` | `claude-code-export-incremental-2026-05-21.zip` | + +Zip contains Markdown per session and optional `manifest.jsonl` metadata. + +#### Errors + +| Status | `code` | When | Extra fields | +|--------|--------|------|--------------| +| 400 | `INVALID_REQUEST_BODY` | Body is not a JSON object | — | +| 400 | `INVALID_SINCE_MODE` | Invalid `since` value | `since` echoes rejected value | +| 422 | `EXPORT_NOTHING_TO_EXPORT` | Zero sessions matched | `since` echoes request mode | + +```bash +curl -X POST -H "Content-Type: application/json" \ + -d '{"since":"last"}' \ + -o export.zip \ + http://127.0.0.1:5000/api/export +``` + +--- + +### `GET /api/export/session//` + +**Source:** [`api/export_api.py`](../api/export_api.py) + +Download one session as Markdown or JSON. + +#### Path parameters + +Same as session detail. + +#### Query parameters + +| Name | Type | Default | Description | +|------|------|---------|-------------| +| `format` | string | `md` | `md` — Markdown attachment; `json` — JSON attachment | + +#### Response — `200 OK` + +| `format` | Content-Type | Disposition | +|----------|--------------|-------------| +| `md` (default) | `text/markdown` | `attachment; filename=".md"` | +| `json` | `application/json` | `attachment; filename=".json"` | + +JSON body matches `GET /api/sessions/.../` session object shape. + +#### Errors + +| Status | `code` | When | +|--------|--------|------| +| 400 | `INVALID_PATH` | Path traversal | +| 404 | `SESSION_NOT_FOUND` | Missing or excluded | +| 500 | `PARSE_ERROR` | JSONL malformed | +| 500 | `INTERNAL_ERROR` | Stats/export pipeline failure | + +```bash +curl -OJ "http://127.0.0.1:5000/api/export/session/F--boost-capy/session_abc123" +curl -OJ "http://127.0.0.1:5000/api/export/session/F--boost-capy/session_abc123?format=json" +``` + +--- + +## Related documentation + +- [Architecture overview](architecture.md) +- [Contributing](../CONTRIBUTING.md) +- [README](../README.md) — CLI export and quick start diff --git a/docs/architecture.md b/docs/architecture.md new file mode 100644 index 0000000..a64ff92 --- /dev/null +++ b/docs/architecture.md @@ -0,0 +1,133 @@ +# Architecture + +**claude-code-chat-browser** reads JSONL session files written by Claude Code under `~/.claude/projects/` and serves them through a JSON HTTP API to a single-page web UI, with a parallel CLI export tool. The app is **read-only** toward `~/.claude/` — it never writes session data back to that tree. + +## Component diagram + +```text + ┌─────────────────────────────┐ + │ ~/.claude/projects/ │ + │ /*.jsonl │ (read-only data source) + └──────────────┬────────────┘ + │ + ┌─────────────────────────┼─────────────────────────┐ + │ │ │ + ▼ ▼ ▼ +┌─────────────────┐ ┌─────────────────────┐ ┌──────────────────┐ +│ session_path │ │ jsonl_parser │ │ exclusion_rules │ +│ list_projects │ │ parse_session │ │ load + match │ +│ list_sessions │ │ quick_session_info │ └────────┬─────────┘ +│ safe_join │ │ _parse_tool_result │ │ +└────────┬────────┘ └──────────┬──────────┘ │ + │ │ │ + └────────────┬───────────┴────────────────────────┘ + │ + ▼ + ┌────────────────────────────┐ + │ api/ │ + │ projects · sessions │ + │ search · export_api │ + │ error_codes │ + └─────────────┬──────────────┘ + │ Flask blueprints + ▼ + ┌────────────────────────────┐ + │ app.py — create_app() │ + └─────────────┬──────────────┘ + │ + ┌───────────────┴───────────────┐ + ▼ ▼ +┌──────────────────┐ ┌──────────────────┐ +│ static/ │ │ scripts/export.py │ +│ index.html + js │ │ (CLI, uses utils) │ +└──────────────────┘ └──────────────────┘ +``` + +## Layers + +| Layer | Responsibility | Key modules | +|-------|----------------|-------------| +| **Data discovery** | Resolve `~/.claude/projects/`, list projects and sessions, prevent path traversal | `utils/session_path.py` | +| **Parsing** | JSONL → session dict (messages, metadata, tool rendering) | `utils/jsonl_parser.py` | +| **Filtering** | Exclude sensitive sessions via rules file | `utils/exclusion_rules.py` | +| **Statistics** | Aggregates for API and exporters | `utils/session_stats.py` | +| **Export — Markdown** | Session → YAML-frontmatter Markdown | `utils/md_exporter.py` | +| **Export — JSON** | Session → JSON string for download | `utils/json_exporter.py` | +| **Export — state** | Incremental export checkpoints on disk | `utils/export_state_store.py`, `api/export_api.py` | +| **HTTP** | Routes, validation, error envelope | `api/*.py`, `api/error_codes.py` | +| **App factory** | Blueprint registration, rules loading, SPA static route | `app.py` | +| **Frontend** | Hash-routed UI, markdown render, shared state | `static/index.html`, `static/js/` | +| **CLI** | Same export semantics as bulk API, no HTTP | `scripts/export.py` | + +## Data flow — typical UI session + +1. Browser loads `GET /` → `static/index.html`. +2. SPA calls `GET /api/projects` → `list_projects()` + `quick_session_info()` for titled counts. +3. User opens a project → `GET /api/projects//sessions` → full `parse_session()` per file, exclusion filter, summary rows. +4. User opens a session → `GET /api/sessions//` → full session JSON for the message panel. +5. Optional: `GET /api/sessions/.../stats` for sidebar metrics without loading all messages. +6. Search: `GET /api/search?q=...` scans all projects (brute force). +7. Export: `POST /api/export` or `GET /api/export/session/...` → Markdown/zip via exporters; state file updated on successful bulk export. + +## Dispatch table + +In `utils/jsonl_parser.py`, tool results are classified through `_parse_tool_result`, a **predicate-ordered dispatch table** (not a simple `if tool_name == ...` chain). **Order is load-bearing**: the first matching predicate wins. Tests in `tests/test_jsonl_parser.py` guard ordering regressions. + +When adding a new tool renderer: + +1. Add predicate + builder pair in the dispatch table in the correct order (specific before generic). +2. Add or extend a JSONL fixture under `tests/fixtures/` if needed. +3. Run `pytest tests/test_jsonl_parser.py -v`. + +## Export state machine + +Bulk export (`POST /api/export`) is stateful. State lives in `~/.claude-code-chat-browser/export-state.json` (see `EXPORT_STATE_FILE` in `utils/export_state_store.py`). + +| `since` mode | Behavior | +|--------------|----------| +| `all` | Export all eligible sessions; update per-session mtimes in state | +| `last` | Export sessions active on the latest UTC calendar day in history | +| `incremental` | Export only sessions newer than last recorded mtime per id | + +Writes are atomic (temp file + `os.replace`) under a lock from `_state_lock()`. + +If zero sessions match, the API returns **`422`** with `EXPORT_NOTHING_TO_EXPORT` and echoes `since` — not an empty zip. + +`GET /api/export/state` reads the same file without mutating it. + +## Exclusion rules engine + +At startup, `create_app()` loads rules from `--exclude-rules` or the default path into `app.config["EXCLUSION_RULES"]`. `is_session_excluded()` is applied on list, detail, search, and export paths so filtered sessions never appear in the UI or downloads. + +## Frontend + +The UI is a **hash-routed** SPA with ES modules under `static/js/`: + +- `app.js` — routing and boot +- `projects.js`, `sessions.js`, `search.js`, `export.js` — route handlers +- `shared/markdown.js` — markdown + **DOMPurify** sanitization (do not render raw LLM HTML) +- `shared/state.js`, `shared/utils.js`, `shared/theme.js` — shared UI state and helpers + +No bundler step — modern browsers load modules directly. Frontend unit tests use **vitest** + **jsdom** (`npm test`). + +## Continuous integration + +[`.github/workflows/ci.yml`](../.github/workflows/ci.yml) runs on push/PR: + +- `prod-install-smoke` — production `requirements.txt` boots the app +- `pytest` — full Python suite with coverage gate on `api/` + `utils/` +- `integration-tests` — API integration subset + coverage artifact +- `js-tests` — `npm ci` + vitest + +## What this codebase is not + +- **Not multi-user** — no authn/authz; single local operator. +- **Not a writeback tool** — never modifies `~/.claude/`. +- **Not a search engine** — `/api/search` is O(sessions × messages); fine for personal history, not for large multi-tenant indexes. +- **Not a versioned public API** — no semver or OpenAPI contract yet; see [`docs/api-reference.md`](api-reference.md) as the human contract. + +## Related documentation + +- [API reference](api-reference.md) +- [Contributing](../CONTRIBUTING.md) +- [README](../README.md) diff --git a/static/js/shared/markdown.test.js b/static/js/shared/markdown.test.js index 3107d30..1b070a2 100644 --- a/static/js/shared/markdown.test.js +++ b/static/js/shared/markdown.test.js @@ -1,4 +1,4 @@ -import { describe, it, expect, beforeEach, afterEach } from 'vitest'; +import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest'; import DOMPurify from 'dompurify'; import { marked } from 'marked'; import { cleanContent, renderMarkdown } from './markdown.js'; @@ -29,14 +29,20 @@ describe('renderMarkdown', () => { }); it('sanitizes script tags from parsed output', () => { + const sanitizeSpy = vi.spyOn(DOMPurify, 'sanitize'); const html = renderMarkdown('# Hello\n\n'); + expect(sanitizeSpy).toHaveBeenCalled(); expect(html).not.toContain(' { + const sanitizeSpy = vi.spyOn(DOMPurify, 'sanitize'); const html = renderMarkdown(''); + expect(sanitizeSpy).toHaveBeenCalled(); expect(html).not.toMatch(/onerror/i); + sanitizeSpy.mockRestore(); }); it('falls back to inline code when marked is unavailable', () => { diff --git a/tests/test_api_routes.py b/tests/test_api_routes.py index 4049e92..e5b9af1 100644 --- a/tests/test_api_routes.py +++ b/tests/test_api_routes.py @@ -61,10 +61,9 @@ def test_search_limit_capped_at_max(client): assert len(results) <= 500 -def test_project_sessions_invalid_path_returns_400_empty_list(client): +def test_project_sessions_invalid_path_returns_invalid_path(client): resp = client.get("/api/projects/../../outside/sessions") - assert resp.status_code == 400 - assert resp.get_json() == [] + assert_error_response(resp, expected_code="INVALID_PATH") def test_export_state_defaults(client_empty): diff --git a/tests/test_search.py b/tests/test_search.py index 1a7c2ac..52a605e 100644 --- a/tests/test_search.py +++ b/tests/test_search.py @@ -22,7 +22,7 @@ def _assert_search_hits(results: list, *, max_items: int) -> None: assert len(results) <= max_items for item in results: assert isinstance(item, dict) - assert _SEARCH_HIT_KEYS.issubset(item.keys()) + assert set(item.keys()) == _SEARCH_HIT_KEYS def test_limit_integer_string(client_single): @@ -50,9 +50,12 @@ def test_limit_default(client_single): def test_limit_whitespace_defaults(client_single): - resp = client_single.get("/api/search?q=Hello&limit=%20%20%20") - assert resp.status_code == 200 - _assert_search_hits(resp.get_json(), max_items=50) + resp_default = client_single.get("/api/search?q=Hello") + resp_ws = client_single.get("/api/search?q=Hello&limit=%20%20%20") + assert resp_ws.status_code == 200 + assert resp_default.status_code == 200 + _assert_search_hits(resp_ws.get_json(), max_items=50) + assert len(resp_ws.get_json()) == len(resp_default.get_json()) def test_limit_zero(client_single):