Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .dockerignore
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,8 @@ __pycache__
*.pyc
*.pyo
data/
tests/
# tests/ must stay in the build context: Dockerfile `test` / `test-builder` targets
# copy them for CI in-container pytest (production stages never COPY tests into the image).
deploy/
coverage.xml
.coverage
Expand Down
37 changes: 20 additions & 17 deletions .env.example
Original file line number Diff line number Diff line change
@@ -1,72 +1,75 @@
# Slack credentials (required)
# --- Required credentials ---
# Slack (required for production; tests set _PAPERSCOUT_TESTING=1 to skip validation)
SLACK_SIGNING_SECRET=your-signing-secret
SLACK_BOT_TOKEN=xoxb-your-bot-token

# Server
# PostgreSQL (required) — shared PostgreSQL on the host.
# When running in Docker, use host.docker.internal to reach the host:
DATABASE_URL=postgresql://paperscout:secret@host.docker.internal:5432/paperscout

# --- Server (optional) ---
PORT=3000
HEALTH_PORT=8080
# Bind address for GET /health (127.0.0.1 = localhost only). Docker Compose sets HEALTH_BIND_HOST=0.0.0.0.
HEALTH_BIND_HOST=127.0.0.1

# Database (required) — shared PostgreSQL on the host.
# When running in Docker, use host.docker.internal to reach the host:
DATABASE_URL=postgresql://paperscout:secret@host.docker.internal:5432/paperscout

# Scheduling
# --- Scheduling (optional) ---
POLL_INTERVAL_MINUTES=30
# Min sleep after an overrun cycle (poll took > poll_interval_minutes).
POLL_OVERRUN_COOLDOWN_SECONDS=300
ENABLE_BULK_WG21=true
ENABLE_BULK_OPENSTD=true
ENABLE_ISO_PROBE=true

# Probe prefixes / extensions (used for gap/unknown numbers)
# --- Probe prefixes / extensions (optional) ---
PROBE_PREFIXES=["D","P"]
PROBE_EXTENSIONS=[".pdf",".html"]

# Frontier
# --- Frontier (optional) ---
FRONTIER_WINDOW_ABOVE=60
FRONTIER_WINDOW_BELOW=30
FRONTIER_EXPLICIT_RANGES=[]
# Max gap between consecutive P-numbers before treating a number as an outlier
# (filters pre-assigned far-future numbers like P5000 from the frontier).
FRONTIER_GAP_THRESHOLD=50

# Hot probing (every poll cycle)
# --- Hot probing (optional) ---
# Papers with a date within this many months are probed every 30 min.
HOT_LOOKBACK_MONTHS=6
# Revisions ahead of the known latest to probe for hot papers.
HOT_REVISION_DEPTH=2

# Cold probing (distributed across cold_cycle_divisor cycles ≈ once per day)
# --- Cold probing (optional) ---
COLD_REVISION_DEPTH=1
# 48 × 30 min = 24 h; every P-number gets probed once within this window.
COLD_CYCLE_DIVISOR=48

# Gap / unknown numbers (no index entry): probe R0 through this revision.
GAP_MAX_REV=1

# Timestamp-based alerting
# --- Timestamp-based alerting (optional) ---
# Only notify for drafts where the server's Last-Modified header is within
# this many hours of now. Falls back to "alert" when the header is absent.
ALERT_MODIFIED_HOURS=24

# HTTP client
# --- HTTP client (optional) ---
HTTP_CONCURRENCY=20
HTTP_TIMEOUT_SECONDS=10
HTTP_USE_HTTP2=true

# Notifications
# --- Notifications (optional) ---
# Slack channel ID for draft notifications; empty = channel posts disabled.
NOTIFICATION_CHANNEL=
# Slack channel ID for ops alerts (stale poll); empty = disabled.
OPS_ALERT_CHANNEL=
NOTIFY_ON_FRONTIER_HIT=true
NOTIFY_ON_ANY_DRAFT=true
NOTIFY_ON_DP_TRANSITION=true

# Storage
# --- Storage (optional) ---
DATA_DIR=./data
CACHE_TTL_HOURS=1

# Logging
# --- Logging (optional) ---
# Log level for both console and file (DEBUG|INFO|WARNING|ERROR).
LOG_LEVEL=INFO
LOG_RETENTION_DAYS=7
38 changes: 34 additions & 4 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,9 @@ jobs:
name: Lockfile (uv)
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v6
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
with:
persist-credentials: false

- name: Install uv
uses: astral-sh/setup-uv@v7
Expand All @@ -31,7 +33,9 @@ jobs:
runs-on: ubuntu-latest
needs: lockfile
steps:
- uses: actions/checkout@v6
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
with:
persist-credentials: false

- name: Install uv
uses: astral-sh/setup-uv@v7
Expand Down Expand Up @@ -61,7 +65,9 @@ jobs:
python-version: ["3.10", "3.11", "3.12"]

steps:
- uses: actions/checkout@v6
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
with:
persist-credentials: false

- name: Install uv
uses: astral-sh/setup-uv@v7
Expand Down Expand Up @@ -98,7 +104,9 @@ jobs:
runs-on: ubuntu-latest
needs: lockfile
steps:
- uses: actions/checkout@v6
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
with:
persist-credentials: false

- name: Install uv
uses: astral-sh/setup-uv@v7
Expand All @@ -111,3 +119,25 @@ jobs:

- name: Run probe benchmark (mock HTTP)
run: uv run pytest benchmarks/ -m benchmark -v --tb=short

docker:
name: Docker image (build + tests in container)
runs-on: ubuntu-latest
needs: lockfile
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6
with:
persist-credentials: false

- name: Build test image
run: docker build --target test -t paperscout:ci .

- name: Run tests in container
run: |
docker run --rm --entrypoint python \
-e _PAPERSCOUT_TESTING=1 \
-e SLACK_BOT_TOKEN=xoxb-ci-placeholder \
-e SLACK_SIGNING_SECRET=ci-placeholder-secret \
-e COVERAGE_FILE=/tmp/.coverage \
paperscout:ci \
-m pytest tests/ -q --cov=paperscout --cov-fail-under=90
35 changes: 35 additions & 0 deletions CONTRIBUTING.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,41 @@ Runtime and dev dependencies are pinned in **`uv.lock`**, generated from [`pypro

**To verify locally before pushing:** `uv lock --check`

### Docker image rebuild

Production images install from [`uv.lock`](uv.lock) via `uv sync --frozen` in the multi-stage [`Dockerfile`](Dockerfile) (not a floating `pip install .`). The base `python:3.12-slim` image is pinned by digest in the Dockerfile.

**After changing dependencies** (`pyproject.toml` / `uv.lock`):

1. Commit the updated lockfile.
2. Rebuild: `docker compose build --no-cache` or `docker build --target production -t paperscout:production .`

**When upgrading the Python base image:**

```bash
docker pull python:3.12-slim
docker inspect --format='{{index .RepoDigests 0}}' python:3.12-slim
```

Update both `FROM` lines in the Dockerfile with the new digest, then rebuild.

**Verify tests inside the image** (no live Postgres required):

```bash
docker build --target test -t paperscout:test .
docker run --rm --entrypoint python \
-e _PAPERSCOUT_TESTING=1 \
-e SLACK_BOT_TOKEN=xoxb-test \
-e SLACK_SIGNING_SECRET=test-secret \
-e COVERAGE_FILE=/tmp/.coverage \
paperscout:test \
-m pytest tests/ -q --cov=paperscout --cov-fail-under=90
```

Production deploys use the default image target (runtime only, no dev dependencies).

See also [deploy/SERVER_SETUP.md](deploy/SERVER_SETUP.md) for operator rebuild steps on the server.

### Tests and coverage

```bash
Expand Down
27 changes: 25 additions & 2 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# Reproducible install: dependency graph from uv.lock (--frozen) with bytecode compile.
FROM python:3.12-slim AS builder
# Update python digest when intentionally upgrading the base image (see CONTRIBUTING.md).
FROM python:3.12-slim@sha256:401f6e1a67dad31a1bd78e9ad22d0ee0a3b52154e6bd30e90be696bb6a3d7461 AS builder

RUN apt-get update && apt-get install -y --no-install-recommends \
gcc libpq-dev \
Expand All @@ -14,8 +15,13 @@ COPY src/ src/
ENV UV_COMPILE_BYTECODE=1
RUN uv sync --frozen --no-dev --no-editable

# Dev deps for CI in-container pytest (target: test).
FROM builder AS test-builder
COPY tests/ tests/
Comment thread
coderabbitai[bot] marked this conversation as resolved.
RUN uv sync --frozen --extra dev --no-editable

FROM python:3.12-slim

FROM python:3.12-slim@sha256:401f6e1a67dad31a1bd78e9ad22d0ee0a3b52154e6bd30e90be696bb6a3d7461 AS runtime

RUN apt-get update && apt-get install -y --no-install-recommends \
libpq5 \
Expand All @@ -37,3 +43,20 @@ USER paperscout
EXPOSE 3000 8080

ENTRYPOINT ["python", "-m", "paperscout"]


FROM runtime AS test

USER root
COPY --from=test-builder /build/.venv /app/.venv
COPY --from=test-builder /build/tests /app/tests
# pytest / coverage read [tool.pytest] and [tool.coverage] from pyproject.toml
COPY --from=test-builder /build/pyproject.toml /app/pyproject.toml
RUN chown -R paperscout:paperscout /app
USER paperscout
ENV COVERAGE_FILE=/tmp/.coverage
# CI runs pytest via `docker run … python -m pytest`; do not inherit paperscout ENTRYPOINT.
ENTRYPOINT []

Comment thread
coderabbitai[bot] marked this conversation as resolved.
# Default image for production (must remain the final stage).
FROM runtime AS production
1 change: 0 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,6 @@ All parameters are configurable via environment variables or a `.env` file. See
| `POLL_INTERVAL_MINUTES` | `30` | Main polling cycle interval |
| `POLL_OVERRUN_COOLDOWN_SECONDS` | `300` | Minimum sleep after a poll cycle that overran the interval (avoids tight loops when work or errors stretch a cycle) |
| `ENABLE_BULK_WG21` | `true` | Fetch wg21.link/index.json each cycle |
| `ENABLE_BULK_OPENSTD` | `true` | Reserved for open-std.org scraping (not yet scheduled) |
| `ENABLE_ISO_PROBE` | `true` | Run isocpp.org HEAD probing each cycle |

### Probe Prefixes / Extensions
Expand Down
6 changes: 6 additions & 0 deletions deploy/SERVER_SETUP.md
Original file line number Diff line number Diff line change
Expand Up @@ -226,6 +226,12 @@ curl -sf http://localhost:9101/health | python3 -m json.tool
docker compose logs -f paperscout
```

### Rebuilding after updates

Production deploys typically run `git pull` followed by `docker compose up -d --build paperscout` (see CD workflow). Dependencies are frozen in `uv.lock` and installed with `uv sync --frozen` during the image build; the base Python image is digest-pinned in the Dockerfile.

When upgrading dependencies or the base image locally, see **Docker image rebuild** in [CONTRIBUTING.md](../CONTRIBUTING.md).

### Example: staging-style host

If you use a **separate** staging deployment (second clone path and GitHub Environment `staging`), typical placeholders are:
Expand Down
5 changes: 4 additions & 1 deletion docker-compose.yml
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
# Image built from digest-pinned Dockerfile + frozen uv.lock (see CONTRIBUTING.md).
services:
paperscout:
build: .
build:
context: .
target: production
ports:
- "127.0.0.1:9100:3000"
- "127.0.0.1:9101:8080"
Expand Down
2 changes: 1 addition & 1 deletion docs/handoff.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ Outbound Slack messages go through a **background queue** (see [`scout.py`](../s

## Open TODOs and deferred items

- **`ENABLE_BULK_OPENSTD` / open-std.org** — Code paths exist in [`sources.py`](../src/paperscout/sources.py); bulk open-std scheduling is **not** integrated into the main poll loop yet (README notes “not yet scheduled”).
- **open-std.org bulk fetch** — Code paths exist in [`sources.py`](../src/paperscout/sources.py); bulk open-std scheduling is **not** integrated into the main poll loop yet (no env toggle).
- **Eval / roadmap items** — If your org keeps a separate eval or ticket backlog, link it here; this repo does not ship a frozen “eval” document.

## Related documents
Expand Down
1 change: 0 additions & 1 deletion docs/onboarding.md
Original file line number Diff line number Diff line change
Expand Up @@ -147,7 +147,6 @@ Every key from [`.env.example`](../.env.example) is listed below. Names in `.env
| `POLL_INTERVAL_MINUTES` | `30` | Target wall-clock spacing between poll cycles (see [Scheduling](#scheduling-asyncio-loop) below). |
| `POLL_OVERRUN_COOLDOWN_SECONDS` | `300` | **Minimum** sleep after any cycle that ran longer than one interval — avoids hammering the network if a cycle overruns. |
| `ENABLE_BULK_WG21` | `true` | Fetch and parse wg21.link index each cycle when enabled. |
| `ENABLE_BULK_OPENSTD` | `true` | Reserved for open-std.org bulk fetch (not yet wired into the scheduler). |
| `ENABLE_ISO_PROBE` | `true` | Run isocpp.org HEAD probing each cycle when enabled. |

### Probe prefixes / extensions
Expand Down
11 changes: 0 additions & 11 deletions src/paperscout/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
import asyncio
import logging
import logging.handlers
import os
import sys
import threading
from datetime import datetime, timezone
Expand Down Expand Up @@ -94,16 +93,6 @@ async def _async_main() -> None:
log.error("DATABASE_URL is not set — cannot start")
sys.exit(1)

if os.environ.get("_PAPERSCOUT_TESTING") != "1" and (
not (settings.slack_bot_token or "").strip()
or not (settings.slack_signing_secret or "").strip()
):
log.error(
"Slack is not configured: SLACK_BOT_TOKEN and SLACK_SIGNING_SECRET must be "
"non-empty — cannot start"
)
sys.exit(1)

launch_time = datetime.now(timezone.utc)

pool = init_pool(settings.database_url)
Expand Down
1 change: 0 additions & 1 deletion src/paperscout/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ class Settings(BaseSettings):
# poll_interval_minutes). Acts as a short cooldown before the next cycle.
poll_overrun_cooldown_seconds: int = Field(default=300, ge=1) # 5 min
enable_bulk_wg21: bool = True
enable_bulk_openstd: bool = True
enable_iso_probe: bool = True

# -- Paper prefixes / extensions (globals used for gap/unknown numbers) --
Expand Down
3 changes: 0 additions & 3 deletions src/paperscout/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,9 +8,6 @@

log = logging.getLogger(__name__)

# Module-level pool; set by __main__ before anything else runs.
pool: pg_pool.ThreadedConnectionPool | None = None

_DDL = """
CREATE TABLE IF NOT EXISTS paper_cache (
key TEXT PRIMARY KEY,
Expand Down
1 change: 0 additions & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,6 @@ def make_test_settings(**overrides) -> Settings:
poll_interval_minutes=30,
poll_overrun_cooldown_seconds=300,
enable_bulk_wg21=True,
enable_bulk_openstd=True,
enable_iso_probe=True,
probe_prefixes=["D", "P"],
probe_extensions=[".pdf"],
Expand Down