Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions src/paperscout/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
"""WG21 paper scout: Slack bot, index polling, and isocpp.org probing."""

from importlib.metadata import PackageNotFoundError, version

try:
Expand Down
11 changes: 3 additions & 8 deletions src/paperscout/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,7 @@


def _setup_logging(data_dir: Path, console_level: str = "INFO", retention_days: int = 7) -> None:
"""Configure root logger with:

• Console (stderr) — at *console_level*, for interactive monitoring.
• Rotating file (data_dir/paperscout.log) — at *console_level*, rotated
midnight each day, keeping *retention_days* days of history.

Noisy third-party libraries are silenced to WARNING regardless.
"""
"""Console + daily rotating file logging; third-party loggers capped at WARNING."""
data_dir.mkdir(parents=True, exist_ok=True)

fmt = logging.Formatter(
Expand Down Expand Up @@ -61,6 +54,7 @@ def _setup_logging(data_dir: Path, console_level: str = "INFO", retention_days:


async def _async_main() -> None:
"""Start DB, Slack app, health server, and the polling scheduler."""
data_dir = settings.data_dir
data_dir.mkdir(parents=True, exist_ok=True)

Expand Down Expand Up @@ -135,6 +129,7 @@ def _on_poll_result(result):


def main() -> None:
"""CLI entry: run ``_async_main`` until interrupt."""
try:
asyncio.run(_async_main())
except KeyboardInterrupt:
Expand Down
4 changes: 4 additions & 0 deletions src/paperscout/config.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
"""Environment-backed runtime configuration (see ``settings`` singleton)."""

from __future__ import annotations

from pathlib import Path
Expand All @@ -7,6 +9,8 @@


class Settings(BaseSettings):
"""Application settings loaded from environment and optional ``.env``."""

model_config = SettingsConfigDict(
env_file=".env",
env_file_encoding="utf-8",
Expand Down
2 changes: 2 additions & 0 deletions src/paperscout/health.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@


class _HealthHandler(BaseHTTPRequestHandler):
"""Serves JSON ``GET /health`` with version, uptime, index and probe stats."""

launch_time: datetime
paper_count_fn: Callable[[], int]
state: object # ProbeState — kept generic to avoid circular import
Expand Down
11 changes: 11 additions & 0 deletions src/paperscout/models.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
"""Domain types for WG21 papers parsed from the wg21.link index."""

from __future__ import annotations

import re
Expand All @@ -6,6 +8,8 @@


class PaperPrefix(str, Enum):
"""Paper ID prefix letters (P/D/N, subgroup codes, etc.)."""

D = "D"
P = "P"
N = "N"
Expand All @@ -19,6 +23,8 @@ class PaperPrefix(str, Enum):


class PaperType(str, Enum):
"""Classification from the wg21.link index ``type`` field."""

PAPER = "paper"
ISSUE = "issue"
EDITORIAL = "editorial"
Expand All @@ -27,6 +33,8 @@ class PaperType(str, Enum):


class FileExt(str, Enum):
"""Published file extension for a paper artifact."""

PDF = ".pdf"
HTML = ".html"

Expand All @@ -38,6 +46,8 @@ class FileExt(str, Enum):

@dataclass(slots=True)
class Paper:
"""One indexed paper: id, metadata, and derived number/prefix/revision."""

id: str
title: str = ""
author: str = ""
Expand Down Expand Up @@ -82,6 +92,7 @@ def revision(self) -> int | None:

@staticmethod
def from_index_entry(key: str, entry: dict) -> Paper:
"""Build a ``Paper`` from a wg21.link index key and value dict."""
return Paper(
id=key,
title=entry.get("title", ""),
Expand Down
23 changes: 11 additions & 12 deletions src/paperscout/monitor.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
"""Polling scheduler: diff index snapshots, run probes, dispatch notifications."""

from __future__ import annotations

import asyncio
Expand All @@ -19,6 +21,8 @@

@dataclass(slots=True)
class DiffResult:
"""New and updated papers between two index snapshots."""

new_papers: list[Paper]
updated_papers: list[Paper]

Expand All @@ -27,6 +31,7 @@ def diff_snapshots(
previous: dict[str, Paper],
current: dict[str, Paper],
) -> DiffResult:
"""Compare two id→paper maps; detect additions and metadata changes."""
new_papers: list[Paper] = []
updated_papers: list[Paper] = []
prev_keys = set(previous.keys())
Expand All @@ -53,11 +58,7 @@ def diff_snapshots(

@dataclass
class PerUserMatches:
"""Watchlist matches for a single Slack user in one poll cycle.

Each entry in *papers* and *probe_hits* is a ``(item, match_reason)``
tuple where ``match_reason`` is ``'author'`` or ``'paper'``.
"""
"""One user's watchlist hits: ``(paper|hit, 'author'|'paper')`` tuples."""

papers: list[tuple[Paper, str]] = field(default_factory=list)
probe_hits: list[tuple[ProbeHit, str]] = field(default_factory=list)
Expand All @@ -68,13 +69,7 @@ class PerUserMatches:

@dataclass(slots=True)
class DPTransition:
"""A D-paper draft that has been formally published as its P counterpart.

*paper* -- the new P-paper entry from the wg21.link index
*draft_url* -- the D-paper URL we originally probed
*last_modified -- server Last-Modified of the draft (Unix timestamp), or None
*discovered_at* -- our wall-clock time when we first found the draft
"""
"""Index P entry that corresponds to a draft URL we previously probed on isocpp."""

paper: Paper
draft_url: str
Expand All @@ -83,6 +78,8 @@ class DPTransition:


class PollResult:
"""Outcome of one poll: index diff, probe hits, D→P transitions, per-user matches."""

def __init__(
self,
diff: DiffResult,
Expand Down Expand Up @@ -147,6 +144,7 @@ async def seed(self) -> None:
)

async def poll_once(self) -> PollResult:
"""Refresh index (if enabled), diff, probe isocpp, compute matches, notify."""
self._poll_count += 1
t0 = time.monotonic()
log.info("POLL-START poll=%d", self._poll_count)
Expand Down Expand Up @@ -272,6 +270,7 @@ async def poll_once(self) -> PollResult:
return result

async def run_forever(self) -> None:
"""Run ``poll_once`` on an interval, with overrun cooldown between cycles."""
interval = self.cfg.poll_interval_minutes * 60
cooldown = self.cfg.poll_overrun_cooldown_seconds
log.info(
Expand Down
33 changes: 26 additions & 7 deletions src/paperscout/scout.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
"""Slack Bolt app: outbound notifications, commands, and message queue."""

from __future__ import annotations

import logging
Expand All @@ -19,6 +21,7 @@


def create_app() -> App:
"""Construct a Slack Bolt ``App`` using configured bot token and signing secret."""
return App(
token=settings.slack_bot_token,
signing_secret=settings.slack_signing_secret,
Expand All @@ -32,12 +35,7 @@ def create_app() -> App:


class MessageQueue:
"""Thread-safe, rate-limited Slack ``chat.postMessage`` queue.

Maintains a 1-message-per-second-per-channel limit and honours the
``Retry-After`` header on HTTP 429 responses. All channel and DM posts
go through this queue so the polling loop is never blocked by Slack I/O.
"""
"""Background queue for Slack posts: per-channel throttle and 429 retry-after."""

def __init__(self, app: App):
self._app = app
Expand All @@ -48,11 +46,13 @@ def __init__(self, app: App):
self._thread: threading.Thread | None = None

def start(self) -> None:
"""Start the background sender thread."""
self._thread = threading.Thread(target=self._run, daemon=True, name="mq-sender")
self._thread.start()
log.info("MessageQueue started")

def enqueue(self, channel: str, text: str, **kwargs) -> None:
"""Queue a ``chat.postMessage`` for *channel* (or user id for DMs)."""
self._q.put((channel, text, kwargs))

def _run(self) -> None:
Expand Down Expand Up @@ -110,18 +110,21 @@ def _send_with_retry(self, channel: str, text: str, kwargs: dict) -> None:


def _paper_link(paper: Paper) -> str:
"""Slack mrkdwn ``<url|id>`` for *paper* (wg21.link fallback if no URL)."""
url = paper.url or paper.long_link
if not url:
url = f"https://wg21.link/{paper.id}"
return f"<{url}|{paper.id}>"


def _hit_label(hit_url: str, prefix: str, number: int, revision: int, ext: str) -> str:
"""Slack mrkdwn link for an isocpp probe hit filename."""
name = f"{prefix}{number:04d}R{revision}{ext}"
return f"<{hit_url}|{name}>"


def _fmt_lm(lm: datetime | None) -> str:
"""Short human-readable age string from a Last-Modified time."""
if lm is None:
return "modified: unknown"
now = datetime.now(timezone.utc)
Expand Down Expand Up @@ -244,6 +247,7 @@ def notify_users(app: App, result: PollResult, mq: MessageQueue) -> None:


def _batch_lines(lines: list[str], max_len: int) -> list[str]:
"""Split *lines* into Slack-sized chunks under *max_len* characters."""
batches: list[str] = []
current: list[str] = []
current_len = 0
Expand All @@ -270,6 +274,8 @@ def register_handlers(
paper_count_fn,
launch_time: datetime | None = None,
) -> None:
"""Wire Slack events for mentions, DMs, watchlist, status, version, uptime."""

def _dispatch(text: str, user_id: str, channel_type: str, say, reply_opts: dict) -> None:
words = [w for w in text.split() if w]
if not words:
Expand Down Expand Up @@ -353,7 +359,13 @@ def handle_message(event, context, say):
if bot_id and f"<@{bot_id}>" in text:
text = text.split(f"<@{bot_id}>", 1)[-1].strip()
if text:
_dispatch(text, user_id, channel_type, say=say, reply_opts=_reply_opts(event))
_dispatch(
text,
user_id,
channel_type,
say=say,
reply_opts=_reply_opts(event),
)

else:
# Public/private channels: handled by app_mention; skip plain messages
Expand All @@ -362,6 +374,7 @@ def handle_message(event, context, say):


def _reply_opts(event: dict) -> dict:
"""kwargs for ``say`` including ``thread_ts`` when replying in a thread."""
opts: dict = {"unfurl_links": False, "unfurl_media": False}
thread_ts = event.get("thread_ts")
if thread_ts:
Expand All @@ -376,6 +389,7 @@ def _handle_watchlist(
say,
reply_opts: dict,
) -> None:
"""Parse ``watchlist`` subcommand: add, remove, list, or usage."""
if not args:
_show_watchlist(user_id, user_watchlist, say, reply_opts)
return
Expand Down Expand Up @@ -408,6 +422,7 @@ def _show_watchlist(
say,
reply_opts: dict,
) -> None:
"""Post the user's watchlist entries or an empty-state hint."""
entries = user_watchlist.list_entries(user_id)
if entries:
lines = [f"• {entry} ({etype})" for entry, etype in entries]
Expand All @@ -426,6 +441,7 @@ def _show_watchlist(


def _handle_status(state: ProbeState, paper_count_fn, say, reply_opts: dict) -> None:
"""Post loaded paper count, last poll, probe settings."""
from datetime import datetime as _dt
from datetime import timezone as _tz

Expand All @@ -449,12 +465,14 @@ def _handle_status(state: ProbeState, paper_count_fn, say, reply_opts: dict) ->


def _handle_version(say, reply_opts: dict) -> None:
"""Post package version string."""
from . import __version__

say(text=f"Paperscout v{__version__}", **reply_opts)


def _format_uptime(delta) -> str:
"""Compact ``Nd Nh Nm`` string for a timedelta."""
total_seconds = int(delta.total_seconds())
days, remainder = divmod(total_seconds, 86400)
hours, remainder = divmod(remainder, 3600)
Expand All @@ -469,6 +487,7 @@ def _format_uptime(delta) -> str:


def _handle_uptime(launch_time: datetime | None, say, reply_opts: dict) -> None:
"""Post time since process start (from *launch_time*)."""
if launch_time is None:
say(text="Uptime information is not available.", **reply_opts)
return
Expand Down
Loading
Loading