diff --git a/CHANGELOG.md b/CHANGELOG.md index e6c0c56ab7a..999fd9bb85e 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -29,7 +29,9 @@ Both clients hit the **stable 3.x API surface**, so server operators don't need - **Self-update subsystem — Tier 3 (auto with grace window).** - On a git install, set `updates.tier: "auto"` to have new releases applied automatically after `preApplyGraceMinutes`. During the grace window, `/admin/update` shows a live countdown plus Cancel and Apply now buttons. Schedules are persisted to `var/update-state.json`, so an Etherpad restart during the grace window rehydrates the timer instead of losing the schedule. A new release tag detected mid-grace re-arms the timer; if `adminEmail` is set, a one-shot `grace-start` notification fires per scheduled tag (issue #7607). - The terminal `rollback-failed` state continues to disable auto/autonomous attempts globally until acknowledged; manual click stays available because an admin click *is* the intervention the terminal state requires. - - Tier 4 (autonomous in a maintenance window) remains designed but unimplemented and will land in a subsequent release. +- **Self-update subsystem — Tier 4 (autonomous in a maintenance window).** + - Set `updates.tier: "autonomous"` together with `updates.maintenanceWindow: {"start":"HH:MM","end":"HH:MM","tz":"local"|"utc"}` to constrain autonomous updates to a nightly window. The scheduler snaps `scheduledFor` forward to the next window opening when grace would otherwise land outside the window, and defers the fire when the window has closed by the timer callback. Cross-midnight windows (`end < start`) are supported; DST transitions are absorbed by the host's wall-clock arithmetic. + - A missing or malformed window degrades the policy to Tier 3 with an explicit `policy.reason` of `maintenance-window-missing` / `maintenance-window-invalid`; an admin banner surfaces the misconfiguration so autonomous behavior is not silently disabled. Closes #7607. - **Privacy — drop swagger-ui telemetry, document phone-homes, add opt-outs.** - Dropped `swagger-ui-express` because upstream injects a Scarf analytics pixel that cannot be disabled at install or runtime (see [swagger-api/swagger-ui#10573](https://github.com/swagger-api/swagger-ui/issues/10573)). `/api-docs` now serves a vendored copy of [Scalar](https://github.com/scalar/scalar) (MIT) configured with `withDefaultFonts: false` and `telemetry: false` so no outbound calls are made. - New `privacy.updateCheck` (default `true`) — set to `false` to disable the hourly `UpdateCheck.ts` request to `${updateServer}/info.json`. diff --git a/admin/src/components/UpdateBanner.tsx b/admin/src/components/UpdateBanner.tsx index c7013aa3cbe..177035ef817 100644 --- a/admin/src/components/UpdateBanner.tsx +++ b/admin/src/components/UpdateBanner.tsx @@ -52,6 +52,23 @@ export const UpdateBanner = () => { ); } + // Tier 4: tier is autonomous but the maintenance window isn't usable. + // Surface that before the generic "update available" banner so the admin + // knows the autonomous behavior is sitting idle. + const policyReason = updateStatus.policy?.reason; + if (updateStatus.tier === 'autonomous' + && (policyReason === 'maintenance-window-missing' + || policyReason === 'maintenance-window-invalid')) { + return ( +
+ + + {' '} + {t('update.banner.cta')} +
+ ); + } + // Tier 3: scheduled update — show countdown banner instead of the plain // "update available" one. if (updateStatus.execution?.status === 'scheduled') { diff --git a/admin/src/pages/UpdatePage.tsx b/admin/src/pages/UpdatePage.tsx index 78127044a6a..f488ceb8fb7 100644 --- a/admin/src/pages/UpdatePage.tsx +++ b/admin/src/pages/UpdatePage.tsx @@ -192,6 +192,54 @@ export const UpdatePage = () => { values={{tag: scheduled.targetTag, remaining: fmtRemaining(remainingMs)}} />

+ {/* Tier 4: only surface the deferral subtitle when `scheduledFor` + was actually snapped forward to the next window opening. The + backend keeps `scheduledFor = now + grace` whenever that lands + inside the window, so we can't use a fixed time-distance + heuristic (a normal 15-min grace would falsely match). Instead, + compare against `nextWindowOpensAt` with a small tolerance — the + two are computed seconds apart at request time, so an exact-ish + match is the only safe signal that the schedule was deferred. */} + {us.tier === 'autonomous' && us.nextWindowOpensAt + && Math.abs(new Date(scheduled.scheduledFor).getTime() + - new Date(us.nextWindowOpensAt).getTime()) < 60 * 1000 && ( +

+ +

+ )} + + )} + + {us.tier === 'autonomous' && ( +
+

+ {us.maintenanceWindow ? ( + <> +

+ +

+ {us.nextWindowOpensAt && ( +

+ +

+ )} + + ) : ( +

+ )}
)} diff --git a/admin/src/store/store.ts b/admin/src/store/store.ts index d5e7e3d8a3d..5643f9ebebf 100644 --- a/admin/src/store/store.ts +++ b/admin/src/store/store.ts @@ -25,6 +25,12 @@ export type LastResult = null | { at: string; }; +export interface MaintenanceWindow { + start: string; + end: string; + tz: 'local' | 'utc'; +} + export interface UpdateStatusPayload { currentVersion: string; latest: null | { @@ -44,6 +50,9 @@ export interface UpdateStatusPayload { execution: Execution; lastResult: LastResult; lockHeld: boolean; + // Tier 4 additions: + maintenanceWindow: MaintenanceWindow | null; + nextWindowOpensAt: string | null; } type ToastState = { diff --git a/doc/admin/updates.md b/doc/admin/updates.md index c527f20475d..f1a934905c4 100644 --- a/doc/admin/updates.md +++ b/doc/admin/updates.md @@ -5,7 +5,7 @@ Etherpad ships with a built-in update subsystem. - **Tier 1 (notify)** — default. A banner appears in the admin UI when a new release is available, and pad users see a discreet badge if the running version is severely outdated or flagged as vulnerable. No execution. - **Tier 2 (manual click)** — admins on a git install can click "Apply update" at `/admin/update`. Etherpad drains active sessions, runs `git fetch / checkout / pnpm install / pnpm run build:ui`, and exits with code 75 so a process supervisor restarts it on the new version. Auto-rolls back on failure. - **Tier 3 (auto with grace window)** — opt-in. On a git install, a newly detected release transitions execution state to `scheduled` and is applied after `preApplyGraceMinutes`. During the grace window, `/admin/update` shows a live countdown plus Cancel and Apply now buttons; an admin email (if `adminEmail` is set) fires once per scheduled tag. -- **Tier 4 (autonomous in maintenance window)** — designed, not yet implemented. +- **Tier 4 (autonomous in maintenance window)** — opt-in. Tier 3 + `updates.maintenanceWindow` is required; the scheduler only fires while the wall clock is inside the configured window. Updates detected outside the window queue for the next opening. ## Settings @@ -192,3 +192,43 @@ The right way to give docker admins an in-product Apply button is to delegate to - **Deploy webhook.** New setting `updates.dockerWebhook`. When set, the Apply button on a docker install POSTs to the configured URL and trusts the orchestrator (Render / Railway / Fly / Portainer / Coolify / GitHub Actions — they all expose redeploy webhooks) to do the actual pull-and-recreate. Direct Docker-socket access (mount `/var/run/docker.sock` into the container) is **out of scope** — anyone who escapes the Etherpad process via that socket gets root on the host. Admins who want fully autonomous docker updates should run [Watchtower](https://containrrr.dev/watchtower/) alongside Etherpad rather than bake equivalent privilege into Etherpad itself. + +## Tier 4 — autonomous in a maintenance window + +Tier 4 layers a wall-clock window on top of Tier 3 so autonomous updates only run while it is safe to drain sessions (typically nightly). + +To enable, on a git install: + +```jsonc +{ + "updates": { + "tier": "autonomous", + "preApplyGraceMinutes": 15, + "maintenanceWindow": { "start": "03:00", "end": "05:00", "tz": "local" } + } +} +``` + +`start` and `end` are 24-hour `HH:MM` wall-clock times in the configured `tz` (`"local"` or `"utc"`). `end` is exclusive; `end < start` denotes a cross-midnight window (`22:00–02:00` runs from 22:00 through 01:59). + +### How the window gate works + +1. `evaluatePolicy` returns `canAutonomous: true` only when the install is `git`, tier is `"autonomous"`, no terminal `rollback-failed` is set, and `updates.maintenanceWindow` is set and parse-valid. Missing/malformed windows return `canAutonomous: false` with `policy.reason` equal to `maintenance-window-missing` / `maintenance-window-invalid`, and the rest of the policy degrades to Tier 3 (`canAuto: true`). An admin banner surfaces the misconfiguration so the autonomous behavior is never silently disabled. +2. When the scheduler picks up a new release while `canAutonomous: true`, it computes `scheduledFor = now + preApplyGraceMinutes`. If that timestamp falls **outside** the window, it is snapped forward to the **next opening** of the window. +3. When the timer fires, the scheduler re-checks the clock. If the window has already closed (long grace, clock skew, host suspend), the fire is **deferred**: `var/update-state.json` is updated with a new `scheduledFor` pointing at the next opening, the timer is re-armed, and the actual apply runs at the next valid moment. + +### DST and timezone notes + +- `tz: "utc"` is recommended for hosts running across DST boundaries — the window is interpreted against the same wall clock every day of the year. +- `tz: "local"` follows the host's local time. On DST spring-forward days, a window starting at a non-existent local time (e.g. `02:30` in `America/New_York` on the second Sunday of March) silently lands at the next valid wall-clock minute via the host JS `Date` constructor's normalization. On fall-back days, the first occurrence of the wall-clock start time is used. +- Cross-midnight windows (`end < start`) span at most 24 hours; longer "windows" should be split into two settings, e.g. by running Tier 3 instead. + +### Admin UI + +`/admin/update` shows a "Maintenance window" section when `updates.tier == "autonomous"`: + +- Configured: summary `HH:MM–HH:MM (tz)` plus "Next window opens at …". +- Not configured: a clear "Not configured" message and a top-of-page banner that links back to the page. +- During a deferred-grace schedule, the scheduled panel shows both the countdown to `scheduledFor` and an explanatory "Outside maintenance window. Update will start when the window opens at …" line. + +Admins edit `updates.maintenanceWindow` via the parsed JSONC settings editor at `/admin/settings`. Saving an invalid shape is caught at boot — the warning is logged via the `updater` log4js category and the policy downgrades to Tier 3. diff --git a/docs/superpowers/plans/2026-05-15-auto-update-pr4-tier4-autonomous.md b/docs/superpowers/plans/2026-05-15-auto-update-pr4-tier4-autonomous.md new file mode 100644 index 00000000000..9aac0d9a446 --- /dev/null +++ b/docs/superpowers/plans/2026-05-15-auto-update-pr4-tier4-autonomous.md @@ -0,0 +1,224 @@ +# Auto-Update PR 4 — Tier 4 (autonomous in maintenance window) Implementation Plan + +> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking. + +**Goal:** Land Tier 4 of the auto-update subsystem: when a new release is detected and `updates.tier == "autonomous"` on a writable install with a valid `updates.maintenanceWindow`, schedule the update so that the drain only starts while `now()` is inside the window. Outside the window, the schedule is deferred to the next opening. The admin UI gains a window picker (start/end HH:MM, tz local|utc) with validation and a "next window opens at..." preview. + +**Architecture:** Add a new pure module `MaintenanceWindow.ts` with `inWindow(now, window)` and `nextWindowStart(now, window)`. Both handle cross-midnight (`end < start`), local- vs utc-tz selection, and DST transitions (compute against the configured wall clock, not UTC offsets that shift). The `Scheduler.decideSchedule()` and `decideTriggerApply()` decisions take a new `maintenanceWindow` input and a `canAutonomous` policy bit; when the tier is `autonomous`, schedules are placed at `max(now + grace, nextWindowStart)` and trigger-apply aborts (back to `scheduled`) if the window has closed by fire time. `UpdatePolicy.canAutonomous` flips on for `git + tier:autonomous + valid window`. Admin UI adds a picker bound to `updates.maintenanceWindow` via the existing settings round-trip; the UpdatePage scheduled panel shows the resolved next-window time. + +**Tech Stack:** TypeScript (Node ≥ 25), Express, log4js, vitest (unit), mocha + supertest (HTTP integration), Playwright (admin UI), React + Zustand (admin UI). + +--- + +## File structure + +### New files + +- `src/node/updater/MaintenanceWindow.ts` — pure `inWindow(now, window)` + `nextWindowStart(now, window)`. No I/O. +- `src/tests/backend-new/specs/updater/MaintenanceWindow.test.ts` — vitest unit. Same-day, cross-midnight, exact boundary, tz=utc vs tz=local, DST spring-forward + fall-back. +- `src/tests/backend/specs/updater-window-integration.ts` — mocha integration. Latest release detected outside window queues for next opening; entering window triggers fire-now (or grace+window); cancel during deferred-grace returns to idle; window closes mid-grace defers to next window without dropping the schedule. +- `admin/src/components/MaintenanceWindowPicker.tsx` — small controlled component: start (HH:MM), end (HH:MM), tz select, validation message, "next window opens at..." preview. +- `src/tests/frontend-new/admin-spec/update-autonomous.spec.ts` — Playwright: window picker round-trips through Settings; scheduled panel renders "next window opens at..." when waiting; cancel works. + +### Modified files + +- `src/node/updater/types.ts` — add `MaintenanceWindow` type (`{start: string; end: string; tz: 'local' | 'utc'}`), thread `maintenanceWindow: MaintenanceWindow | null` through `PolicyInput`. +- `src/node/updater/UpdatePolicy.ts` — `canAutonomous` flips on for `git + tier === 'autonomous'` AND a non-null, schema-valid `maintenanceWindow`. Add new policy `reason` value `'maintenance-window-missing'` (denied tier 4 when window not configured) and `'maintenance-window-invalid'` (denied tier 4 when window fails parse). +- `src/node/updater/Scheduler.ts` — extend `DecideScheduleInput` with `maintenanceWindow` + `canAutonomous`; when canAutonomous, `scheduledFor = max(now+grace, nextWindowStart(now+grace, window))`. Extend `decideTriggerApply()` so that when canAutonomous and `inWindow(now, window) === false`, return new action `{action: 'defer'; nextStart: string}`. Extend `SchedulerRunner` to re-arm on defer. +- `src/node/updater/index.ts` — pass `updates.maintenanceWindow` + the autonomous bit into `decideSchedule`/`decideTriggerApply`. On `defer`, persist new `scheduledFor` and re-arm. Log line at `info`: `updater: deferred to next maintenance window at `. +- `src/node/utils/Settings.ts` — add `maintenanceWindow: MaintenanceWindow | null` to the `updates` settings type; default `null`. Validate shape on boot; on invalid, log a warning and treat as null (do not crash boot). +- `settings.json.template` + `settings.json.docker` — add `"maintenanceWindow": null` line with comment showing example `{"start":"03:00","end":"05:00","tz":"local"}`. +- `src/node/hooks/express/updateStatus.ts` — surface `nextWindowStart` (computed at request time when tier is autonomous + window set) in `GET /admin/update/status` response so the admin UI can show "next window opens at...". +- `src/locales/en.json` — `update.window.start`, `update.window.end`, `update.window.tz_local`, `update.window.tz_utc`, `update.window.validation.format`, `update.window.validation.equal`, `update.window.next_opens_at`, `update.page.scheduled.deferred_until`, `update.page.policy.autonomous_no_window`, `update.page.policy.autonomous_invalid_window`. +- `admin/src/store/store.ts` — extend `Settings.updates` with `maintenanceWindow`; extend response shape returned by `/admin/update/status` with optional `nextWindowOpensAt: string | null`. +- `admin/src/pages/UpdatePage.tsx` — render `MaintenanceWindowPicker` when `tier === 'autonomous'`. Render "Deferred — next window opens at ..." when `execution.status === 'scheduled'` and `scheduledFor > now`. Show explicit `policy.reason` text for `autonomous_no_window` and `autonomous_invalid_window`. +- `admin/src/components/UpdateBanner.tsx` — add a banner variant when `tier === 'autonomous'` but window is missing/invalid: "Autonomous updates are disabled until a maintenance window is configured." Links to `/admin/update`. +- `doc/admin/updates.md` — flip Tier 4 from "designed, not yet implemented" to current; document `maintenanceWindow` shape, cross-midnight, DST behavior, fallback when window is missing. +- `CHANGELOG.md` — Unreleased section entry under `### Added`. +- `docs/superpowers/specs/2026-04-25-auto-update-runbook.md` — append Tier 4 smoke section: configure window 5 min from now, observe deferral, walk window forward, observe fire. + +--- + +## Task 1: Settings schema for `maintenanceWindow` + +**Files:** +- Modify: `src/node/utils/Settings.ts` +- Modify: `settings.json.template` +- Modify: `settings.json.docker` +- Modify: `src/node/updater/types.ts` (export `MaintenanceWindow`) +- Test: extend an existing Settings-load test if one exists for `updates`; otherwise rely on Task 4 unit coverage of the window module + boot-time log. + +**Steps:** +- [ ] In `src/node/updater/types.ts` add `export interface MaintenanceWindow { start: string; end: string; tz: 'local' | 'utc' }`. +- [ ] In `src/node/utils/Settings.ts` extend the `updates` type with `maintenanceWindow: MaintenanceWindow | null`. Default to `null` in the literal. +- [ ] Add boot-time validation: regex `/^([01]\d|2[0-3]):[0-5]\d$/` for both `start` and `end`; tz must be `'local' | 'utc'`; `start !== end`. On invalid, log warning via `log4js` category `updater` and set to `null` (do not crash). Validation lives in a small pure helper exported from `MaintenanceWindow.ts` (`parseWindow`) so the policy and the UI can reuse it. +- [ ] Edit `settings.json.template` and `settings.json.docker` to include `"maintenanceWindow": null` immediately below `tier`, with a comment showing the shape. + +**Verification:** +- [ ] `pnpm exec tsc --noEmit` clean. +- [ ] Boot the server with a deliberately malformed window (`{"start":"oops"}`) and confirm the warning is logged and tier downgrades to `auto` effectively (canAutonomous=false via the policy reason `'maintenance-window-invalid'`). + +--- + +## Task 2: `MaintenanceWindow.ts` module + unit tests + +**Files:** +- Create: `src/node/updater/MaintenanceWindow.ts` +- Create: `src/tests/backend-new/specs/updater/MaintenanceWindow.test.ts` + +**Steps:** +- [ ] Export `parseWindow(raw: unknown): MaintenanceWindow | null` (returns `null` if shape/format invalid). +- [ ] Export `inWindow(now: Date, window: MaintenanceWindow): boolean`. Compare against the configured tz's wall clock. For `tz: 'utc'` use `getUTCHours/Minutes`; for `tz: 'local'` use `getHours/Minutes`. Cross-midnight (`end < start`): inside if `now ≥ start || now < end`. +- [ ] Export `nextWindowStart(now: Date, window: MaintenanceWindow): Date`. Returns the next `Date` whose wall-clock time equals `start` in the configured tz and which is ≥ `now`. For `tz: 'local'` this is straightforward; for `tz: 'utc'` build via `Date.UTC`. Document via inline comment that DST spring-forward will be handled by the host's `setTimer`/`setTimeout` and we never schedule "into the gap" because we always compare against wall clock. + +**Tests (vitest):** +- [ ] `inWindow` — same-day window 03:00-05:00 (inside at 03:30, outside at 02:59, outside at 05:00 (exclusive end)). +- [ ] `inWindow` — cross-midnight 22:00-02:00 (inside at 23:00 and at 01:00; outside at 02:00 and 21:59). +- [ ] `inWindow` — tz=utc respects UTC clock regardless of host TZ (run with `TZ=America/Los_Angeles`). +- [ ] `nextWindowStart` — when `now` is before today's start, returns today at start. +- [ ] `nextWindowStart` — when `now` is inside the window, returns next day's start (callers gate fire-now via `inWindow`, not `nextWindowStart`). +- [ ] `nextWindowStart` — DST spring forward (America/New_York, 2026-03-08, window 02:30-03:30 local): `nextWindowStart` for `now = 2026-03-08T06:00:00Z` resolves to the next wall-clock 02:30 (which is actually 03:30 local on the DST day; document this in the test). +- [ ] `nextWindowStart` — DST fall back (America/New_York, 2026-11-01, window 01:30-02:30 local): assertion that `nextWindowStart` returns the *first* 01:30 wall-clock occurrence. +- [ ] `parseWindow` — accepts `{start:"03:00",end:"05:00",tz:"local"}`; rejects missing fields, malformed times, `start===end`, unknown tz. + +**Verification:** +- [ ] `pnpm exec vitest run src/tests/backend-new/specs/updater/MaintenanceWindow.test.ts` green. + +--- + +## Task 3: Extend `UpdatePolicy` with `canAutonomous` and window args + +**Files:** +- Modify: `src/node/updater/UpdatePolicy.ts` +- Modify: `src/node/updater/types.ts` (extend `PolicyInput`) +- Modify: `src/tests/backend-new/specs/updater/UpdatePolicy.test.ts` + +**Steps:** +- [ ] Extend `PolicyInput` with `maintenanceWindow: MaintenanceWindow | null` (optional, defaults to null in callers). +- [ ] Modify `evaluatePolicy`: when `tier === 'autonomous'` and writable and not terminal: + - if `maintenanceWindow == null`, `canAutonomous = false`, `reason = 'maintenance-window-missing'`, but keep `canAuto = true`, `canManual = true` (degrade to Tier 3 behavior). + - if `parseWindow(maintenanceWindow) == null`, same as above with `reason = 'maintenance-window-invalid'`. + - otherwise `canAutonomous = true`. +- [ ] Update existing tests that asserted `canAutonomous: true` for `tier: 'autonomous'` without a window — they now expect `canAutonomous: false, reason: 'maintenance-window-missing'`. Add new cases for the three policy outcomes. + +**Verification:** +- [ ] `pnpm exec vitest run src/tests/backend-new/specs/updater/UpdatePolicy.test.ts` green. + +--- + +## Task 4: Scheduler — gate scheduling + firing on window + +**Files:** +- Modify: `src/node/updater/Scheduler.ts` +- Modify: `src/tests/backend-new/specs/updater/Scheduler.test.ts` (extend; create if absent) + +**Steps:** +- [ ] Extend `DecideScheduleInput` with `maintenanceWindow: MaintenanceWindow | null` and use `policy.canAutonomous` to decide whether to apply the window gate. +- [ ] In `decideSchedule`, after the existing grace computation, if `canAutonomous && maintenanceWindow`: + - candidate `scheduledFor = now + grace`. + - if `inWindow(candidate, window) === false`, set `scheduledFor = nextWindowStart(candidate, window)`. + - keep the rest of the email/dedupe machinery untouched (`grace-start` email cadence still fires once per tag). +- [ ] In `decideTriggerApply`, add a parameter for the resolved policy plus the window/now. If `policy.canAutonomous && !inWindow(now, window)`, return new decision `{action: 'defer'; nextStart: string}`. The runner persists `scheduledFor = nextStart` and re-arms. +- [ ] In `SchedulerRunner`, extend the timer-fire callback to call `triggerApply` and, on `defer`, re-arm without firing. (The runner is already idempotent on `arm`.) + +**Tests (vitest):** +- [ ] `decideSchedule` — canAutonomous + window 03:00-05:00 + now=10:00 → `scheduledFor` snapped to the next 03:00 (not `now + grace`). +- [ ] `decideSchedule` — canAutonomous + window 03:00-05:00 + now=03:30 with grace=0 → `scheduledFor` is `now` (inside window, no snap). +- [ ] `decideTriggerApply` — canAutonomous + outside window → `{action: 'defer', nextStart: }`. +- [ ] `decideTriggerApply` — canAutonomous + inside window → `{action: 'fire'}`. +- [ ] Email dedupe: defer does not trigger a new `grace-start` email. + +**Verification:** +- [ ] `pnpm exec vitest run src/tests/backend-new/specs/updater/Scheduler.test.ts` green. + +--- + +## Task 5: Wire scheduler runner + status endpoint to surface window state + +**Files:** +- Modify: `src/node/updater/index.ts` +- Modify: `src/node/hooks/express/updateStatus.ts` +- Modify: `src/tests/backend/specs/updater-actions.ts` (or the equivalent status test) — extend to assert `nextWindowOpensAt` is present when tier=autonomous + window set. + +**Steps:** +- [ ] In the periodic check loop, pass `settings.updates.maintenanceWindow` into `decideSchedule`. Pass policy result into both `decideSchedule` and `decideTriggerApply`. +- [ ] On `{action: 'defer'}`, write `state.execution.scheduledFor = nextStart`, persist, `runner.arm(...)`. Emit a log line at INFO category `updater`. +- [ ] In `updateStatus.ts`, when `tier === 'autonomous'` and `maintenanceWindow` parses, compute `nextWindowOpensAt = nextWindowStart(now, window)` and include in the JSON response (`null` otherwise). + +**Verification:** +- [ ] `pnpm exec mocha src/tests/backend/specs/updater-actions.ts` green. + +--- + +## Task 6: Admin UI — `MaintenanceWindowPicker` + scheduled-panel "deferred until" + +**Files:** +- Create: `admin/src/components/MaintenanceWindowPicker.tsx` +- Modify: `admin/src/pages/UpdatePage.tsx` +- Modify: `admin/src/components/UpdateBanner.tsx` +- Modify: `admin/src/store/store.ts` +- Modify: `src/locales/en.json` +- Test: `src/tests/frontend-new/admin-spec/update-autonomous.spec.ts` + +**Steps:** +- [ ] `MaintenanceWindowPicker.tsx` — controlled component over `value: {start, end, tz} | null`, emits `onChange`. Inline validation message via i18n keys `update.window.validation.format` / `update.window.validation.equal`. Below the picker, render the resolved `nextWindowOpensAt` (passed in via prop) with key `update.window.next_opens_at`. +- [ ] In `UpdatePage.tsx`, when `settings.updates.tier === 'autonomous'`, render the picker. Wiring through the existing settings round-trip (the parsed settings editor PR #7709 lands first; if it's not yet on develop at integration time, fall back to writing through `/admin/settings`). +- [ ] When `execution.status === 'scheduled'` and `policy.canAutonomous` and `scheduledFor > now`, render the scheduled panel with the deferral subtitle (`update.page.scheduled.deferred_until`). +- [ ] In `UpdateBanner.tsx`, render the "configure maintenance window" banner when `policy.reason === 'maintenance-window-missing' | 'maintenance-window-invalid'` and `tier === 'autonomous'`. +- [ ] Add all i18n keys to `en.json`. **Always i18n, never hardcoded** (memory: `feedback_always_i18n`). + +**Tests (Playwright):** +- [ ] Window picker saves a value; reload restores it. +- [ ] Invalid input shows the validation message and does not save. +- [ ] When tier=autonomous + window set + outside window, the scheduled panel shows "Next window opens at HH:MM (local)". +- [ ] When tier=autonomous + window missing, the banner renders the link to `/admin/update`. + +**Verification:** +- [ ] `pnpm --filter ep_etherpad-lite exec playwright test src/tests/frontend-new/admin-spec/update-autonomous.spec.ts` green (port 9003 per memory `feedback_test_port_9003`). + +--- + +## Task 7: Window-boundary integration test + +**Files:** +- Create: `src/tests/backend/specs/updater-window-integration.ts` + +**Cases:** +- [ ] Outside window: VersionChecker sees a new release; Scheduler arms `scheduledFor = nextWindowStart`; no drain starts. +- [ ] Enter window: clock advances to inside-window; fire-time `decideTriggerApply` returns `fire`; drain starts. +- [ ] Cancel during deferred-grace: `/admin/update/cancel` returns 200 and `execution.status` returns to `idle`. +- [ ] Window closes mid-grace: clock advances past `end` before fire; `decideTriggerApply` returns `defer`; state persists with new `scheduledFor`; runner re-arms. + +**Verification:** +- [ ] `pnpm exec mocha src/tests/backend/specs/updater-window-integration.ts` green. + +--- + +## Task 8: Docs, runbook, CHANGELOG + +**Files:** +- Modify: `doc/admin/updates.md` +- Modify: `docs/superpowers/specs/2026-04-25-auto-update-runbook.md` +- Modify: `CHANGELOG.md` + +**Steps:** +- [ ] Flip the Tier 4 section in `doc/admin/updates.md` from "designed, not yet implemented" to current. Document `maintenanceWindow` shape, cross-midnight, DST behavior, and the policy fallback when the window is missing or invalid. +- [ ] Append a Tier 4 smoke section to the runbook: configure window 5 min from now, observe deferral, walk window forward, observe fire, observe rollback path inside window still works. +- [ ] Add an `Unreleased` entry to `CHANGELOG.md` under `### Added`. + +**Verification:** +- [ ] Manual: `pnpm run dev` on a clean checkout with `tier: "autonomous"` + a near-future 2-minute window and confirm the admin UI matches the documented flow. + +--- + +## Cross-cutting checks before opening the PR + +- [ ] `pnpm exec tsc --noEmit` clean (root + admin). +- [ ] `pnpm exec vitest run` green (backend-new). +- [ ] `pnpm exec mocha src/tests/backend/specs/updater-*.ts` green. +- [ ] Playwright admin spec green under `pnpm --filter ep_etherpad-lite exec playwright test src/tests/frontend-new/admin-spec/update-autonomous.spec.ts` on port 9003. +- [ ] `pnpm run build:ui` succeeds. +- [ ] Manual smoke runbook Tier 4 section completed against a disposable VM (canary deferred to merge if the 2-week canary requirement from spec §"Ship gate" is dropped; otherwise gate merge on canary). +- [ ] PR title `feat(updater): tier 4 — autonomous update in maintenance window (#7607)`. +- [ ] PR body links to the spec + this plan, lists settings additions, and links to PRs #7601 / #7704 / #7720. +- [ ] After merge, close issue #7607 with a summary comment linking all four PRs. diff --git a/docs/superpowers/specs/2026-04-25-auto-update-runbook.md b/docs/superpowers/specs/2026-04-25-auto-update-runbook.md index 27ca0f578bd..36fb22bdd3e 100644 --- a/docs/superpowers/specs/2026-04-25-auto-update-runbook.md +++ b/docs/superpowers/specs/2026-04-25-auto-update-runbook.md @@ -244,3 +244,49 @@ If any step diverges, capture `var/log/update.log` and stop. Add to the §10 sig - [ ] Apply now during scheduled runs the Tier 2 pipeline immediately. - [ ] Restart-in-grace rehydrates the timer. - [ ] `grace-start` email fires once per tag when `adminEmail` is set. + +## 12. Tier 4 — autonomous in a maintenance window + +Goal: verify the scheduler defers autonomous applies to the configured window, snaps grace forward to the next opening, and surfaces the configuration state in the admin UI. + +### Setup + +Continuing from §11. Settings additions in `settings.json`: + +```jsonc +{ + "updates": { + "tier": "autonomous", + "preApplyGraceMinutes": 1, + "maintenanceWindow": null + } +} +``` + +Restart Etherpad. + +1. **Missing window banner:** visit `/admin/update`. Expect: + - A red/yellow banner at the top: *"Autonomous updates are disabled until a maintenance window is configured."* + - The "Maintenance window" section shows "Not configured." + - `policy.reason` in `GET /admin/update/status` is `maintenance-window-missing`. +2. **Malformed window:** set `"maintenanceWindow": {"start":"oops","end":"05:00","tz":"local"}`. Restart. Expect: + - `journalctl -u etherpad` shows `updater: ignoring malformed updates.maintenanceWindow (...)`. + - The banner now reads *"Autonomous updates are disabled because the maintenance window is malformed."* + - `policy.reason` is `maintenance-window-invalid`. +3. **Outside-window deferral:** set the window to **5 minutes in the future**, e.g. at 14:00 local set `{"start":"14:05","end":"14:10","tz":"local"}`. Restart. Force a new release as in §3. Expect: + - The next version check transitions `execution.status` to `scheduled`. + - `scheduledFor` is at the **window start** (14:05), not at `now + 1m`. + - The scheduled panel shows both the countdown *and* an *"Outside maintenance window. Update will start when the window opens at …"* line. +4. **Fire-at-opening:** wait for the window to open. The scheduler should fire and the regular Tier 2 pipeline (drain → executor → exit 75) runs. State ends at `verified`. +5. **Window-closes-mid-grace:** repeat the setup, but configure a window that **closes** before `now + preApplyGraceMinutes`. For example: at 14:00 local set `{"start":"14:01","end":"14:02","tz":"local"}`, `preApplyGraceMinutes: 5`. Force a release. The scheduler arms for 14:01 but at fire time (after the window has closed) `decideTriggerApply` returns `defer`. Expected: + - `journalctl -u etherpad` shows `updater: scheduler deferred ... to next maintenance window at ...`. + - `var/update-state.json` has a *new* `scheduledFor` ~24h ahead. + - No drain, no exit, no apply. + +Add to the §10 sign-off checklist: + +- [ ] Tier 4 missing-window banner renders the localised string. +- [ ] Tier 4 malformed-window banner renders the localised string. +- [ ] Outside-window `scheduledFor` snaps to the next window opening. +- [ ] Scheduled panel shows the "deferred until" line when outside the window. +- [ ] Window-closes-mid-grace cleanly defers without applying. diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index dd73ab1ba3b..cba3fd55ee5 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -289,6 +289,9 @@ importers: nano: specifier: ^11.0.5 version: 11.0.5 + nodemailer: + specifier: ^8.0.7 + version: 8.0.7 oidc-provider: specifier: 9.8.3 version: 9.8.3 @@ -419,6 +422,9 @@ importers: '@types/node': specifier: ^25.8.0 version: 25.8.0 + '@types/nodemailer': + specifier: ^8.0.0 + version: 8.0.0 '@types/oidc-provider': specifier: ^9.5.0 version: 9.5.0 @@ -1959,6 +1965,9 @@ packages: '@types/node@25.8.0': resolution: {integrity: sha512-TCFSk8IZh+iLX1xtksoBVtdmgL+1IX0fC9BeU4QqFSuNdN/K+HUlhqOzEmSYYpZUVsLYcPqc9KX+60iDuninSQ==} + '@types/nodemailer@8.0.0': + resolution: {integrity: sha512-fyf8jWULsCo0d0BuoQ75i6IeoHs47qcqxWc7yUdUcV0pOZGjUTTOvwdG1PRXUDqN/8A64yQdQdnA2pZgcdi+cA==} + '@types/oidc-provider@9.5.0': resolution: {integrity: sha512-eEzCRVTSqIHD9Bo/qRJ4XQWQ5Z/zBcG+Z2cGJluRsSuWx1RJihqRyPxhIEpMXTwPzHYRTQkVp7hwisQOwzzSAg==} @@ -4450,6 +4459,10 @@ packages: nodeify@1.0.1: resolution: {integrity: sha512-n7C2NyEze8GCo/z73KdbjRsBiLbv6eBn1FxwYKQ23IqGo7pQY3mhQan61Sv7eEDJCiyUjTVrVkXTzJCo1dW7Aw==} + nodemailer@8.0.7: + resolution: {integrity: sha512-pkjE4mkBzQjdJT4/UmlKl3pX0rC9fZmjh7c6C9o7lv66Ac6w9WCnzPzhbPNxwZAzlF4mdq4CSWB5+FbK6FWCow==} + engines: {node: '>=6.0.0'} + object-assign@4.1.1: resolution: {integrity: sha512-rJgTQnkUnH1sFw8yT6VSU3zD3sWmu6sZhIseY8VX+GRu3P6F7Fu+JNDoXfklElbLJSnc3FUQHVe4cU5hj+BcUg==} engines: {node: '>=0.10.0'} @@ -7191,7 +7204,7 @@ snapshots: '@types/accepts@1.3.7': dependencies: - '@types/node': 25.7.0 + '@types/node': 25.8.0 '@types/async@3.2.25': {} @@ -7207,7 +7220,7 @@ snapshots: '@types/connect@3.4.38': dependencies: - '@types/node': 25.7.0 + '@types/node': 25.8.0 '@types/content-disposition@0.5.9': {} @@ -7222,11 +7235,11 @@ snapshots: '@types/connect': 3.4.38 '@types/express': 5.0.6 '@types/keygrip': 1.0.6 - '@types/node': 25.7.0 + '@types/node': 25.8.0 '@types/cors@2.8.19': dependencies: - '@types/node': 25.7.0 + '@types/node': 25.8.0 '@types/cross-spawn@6.0.6': dependencies: @@ -7356,6 +7369,10 @@ snapshots: dependencies: undici-types: 7.24.6 + '@types/nodemailer@8.0.0': + dependencies: + '@types/node': 25.8.0 + '@types/oidc-provider@9.5.0': dependencies: '@types/keygrip': 1.0.6 @@ -7380,13 +7397,13 @@ snapshots: '@types/readable-stream@4.0.23': dependencies: - '@types/node': 25.7.0 + '@types/node': 25.8.0 '@types/semver@7.7.1': {} '@types/send@1.2.1': dependencies: - '@types/node': 25.7.0 + '@types/node': 25.8.0 '@types/serve-static@2.2.0': dependencies: @@ -10092,6 +10109,8 @@ snapshots: is-promise: 1.0.1 promise: 1.3.0 + nodemailer@8.0.7: {} + object-assign@4.1.1: {} object-inspect@1.13.4: {} diff --git a/settings.json.docker b/settings.json.docker index e9815c6bed1..5da12e7e82b 100644 --- a/settings.json.docker +++ b/settings.json.docker @@ -224,7 +224,8 @@ "rollbackHealthCheckSeconds": 60, "diskSpaceMinMB": 500, "requireSignature": false, - "trustedKeysPath": null + "trustedKeysPath": null, + "maintenanceWindow": null }, /* @@ -233,6 +234,19 @@ */ "adminEmail": null, + /* + * SMTP transport. host=null keeps log-only behaviour; set host+from to send + * real mail via nodemailer (lazy-loaded). Pulls from env vars by default — + * leave the template as-is and provide MAIL_HOST / MAIL_FROM at runtime. + */ + "mail": { + "host": "${MAIL_HOST:null}", + "port": "${MAIL_PORT:587}", + "secure": "${MAIL_SECURE:false}", + "from": "${MAIL_FROM:null}", + "auth": null + }, + /* * Settings for cleanup of pads */ diff --git a/settings.json.template b/settings.json.template index baae8110d0b..2e2c93a5cb5 100644 --- a/settings.json.template +++ b/settings.json.template @@ -239,13 +239,18 @@ * - diskSpaceMinMB: pre-flight refuses to start an update without this much free. * - requireSignature: refuse updates whose tag isn't signed by a trusted key. * - trustedKeysPath: override the keyring location passed to git verify-tag (GNUPGHOME). + * - maintenanceWindow: tier 4 only — nightly window during which the scheduler + * may fire. Null = tier 4 disabled (with tier="autonomous", the policy + * downgrades to canAuto). Shape: {"start":"HH:MM","end":"HH:MM","tz":"local"|"utc"}. + * `end` is exclusive; `end < start` denotes a cross-midnight window. */ "preApplyGraceMinutes": 0, "drainSeconds": 60, "rollbackHealthCheckSeconds": 60, "diskSpaceMinMB": 500, "requireSignature": false, - "trustedKeysPath": null + "trustedKeysPath": null, + "maintenanceWindow": null }, /* @@ -267,6 +272,29 @@ */ "adminEmail": null, + /* + * SMTP transport for outbound admin notifications. host=null keeps the + * legacy log-only behaviour (Notifier still dedupes; nothing leaves the + * box). Set host+from (and optionally auth) to deliver via nodemailer. + * The dependency is lazy-loaded so installs without mail.host pay no + * runtime cost. + * + * "mail": { + * "host": "smtp.example.com", + * "port": 587, + * "secure": false, + * "from": "etherpad@example.com", + * "auth": { "user": "smtp-user", "pass": "smtp-pass" } + * } + */ + "mail": { + "host": null, + "port": 587, + "secure": false, + "from": null, + "auth": null + }, + /* * Settings for cleanup of pads */ diff --git a/src/locales/en.json b/src/locales/en.json index 7d51f13b207..22b26ec8d03 100644 --- a/src/locales/en.json +++ b/src/locales/en.json @@ -168,6 +168,8 @@ "update.page.policy.rollback-failed-terminal": "A previous update failed and could not be rolled back. Press Acknowledge after the install is healthy to clear the lock.", "update.page.policy.up-to-date": "You are running the latest version.", "update.page.policy.tier-off": "Updates are disabled (updates.tier = \"off\").", + "update.page.policy.maintenance-window-missing": "Tier 4 (autonomous) requires a maintenance window. Set updates.maintenanceWindow in settings.json to enable autonomous updates.", + "update.page.policy.maintenance-window-invalid": "Tier 4 (autonomous) is disabled because updates.maintenanceWindow is malformed. Expected {start, end, tz} with HH:MM times and tz of \"local\" or \"utc\".", "update.page.last_result.verified": "Last update to {{tag}} verified.", "update.page.last_result.rolled-back": "Last attempted update to {{tag}} rolled back: {{reason}}.", "update.page.last_result.rollback-failed": "Last update attempt failed AND rollback failed: {{reason}}. Manual intervention required.", @@ -186,9 +188,16 @@ "update.execution.rollback-failed": "Rollback failed", "update.banner.terminal.rollback-failed": "An update attempt failed and could not be rolled back. Manual intervention required.", "update.banner.scheduled": "Auto-update to {{tag}} scheduled — applies in {{remaining}}.", + "update.banner.maintenance-window-missing": "Autonomous updates are disabled until a maintenance window is configured.", + "update.banner.maintenance-window-invalid": "Autonomous updates are disabled because the maintenance window is malformed.", "update.page.scheduled.title": "Update scheduled", "update.page.scheduled.countdown": "Etherpad will start updating to {{tag}} in {{remaining}}.", + "update.page.scheduled.deferred_until": "Outside maintenance window. Update will start when the window opens at {{at}}.", "update.page.scheduled.apply_now": "Apply now", + "update.window.title": "Maintenance window", + "update.window.summary": "{{start}}–{{end}} ({{tz}})", + "update.window.unset": "Not configured.", + "update.window.next_opens_at": "Next window opens at {{at}}.", "update.drain.t60": "Etherpad will restart in 60 seconds to apply an update.", "update.drain.t30": "Etherpad will restart in 30 seconds to apply an update.", "update.drain.t10": "Etherpad will restart in 10 seconds to apply an update.", diff --git a/src/node/hooks/express/updateActions.ts b/src/node/hooks/express/updateActions.ts index 2962f5afab9..0b4f1a2aea1 100644 --- a/src/node/hooks/express/updateActions.ts +++ b/src/node/hooks/express/updateActions.ts @@ -6,7 +6,7 @@ import {spawn} from 'node:child_process'; import log4js from 'log4js'; import {ArgsExpressType} from '../../types/ArgsExpressType'; import settings, {getEpVersion} from '../../utils/Settings'; -import {getDetectedInstallMethod, stateFilePath, getRollbackDeps} from '../../updater'; +import {getDetectedInstallMethod, stateFilePath, getRollbackDeps, notifyApplyFailure} from '../../updater'; import {evaluatePolicy} from '../../updater/UpdatePolicy'; import {loadState, saveState} from '../../updater/state'; import {acquireLock, releaseLock} from '../../updater/lock'; @@ -104,6 +104,20 @@ const buildPreflightDeps = (installMethod: ReturnType new Promise((resolve) => { + const c = spawn('git', ['show', `${tag}:package.json`], + {cwd: settings.root, stdio: ['ignore', 'pipe', 'ignore']}); + let out = ''; + c.stdout.on('data', (b) => { out += b.toString(); }); + c.on('close', () => { + try { + const pkg = JSON.parse(out); + const range = pkg?.engines?.node; + resolve(typeof range === 'string' && range.trim().length > 0 ? range : null); + } catch { resolve(null); } + }); + c.on('error', () => resolve(null)); + }), }); /** @@ -193,6 +207,7 @@ export const expressCreateServer = ( diskSpaceMinMB: Number(settings.updates.diskSpaceMinMB) || 500, requireSignature: settings.updates.requireSignature, trustedKeysPath: settings.updates.trustedKeysPath, + currentNodeVersion: process.versions.node, }, { ...baseDeps, @@ -256,6 +271,20 @@ export const expressCreateServer = ( }); drainer = null; + // Fire the failure-notification email path for outcomes the admin needs + // to know about even on manual apply (an admin might click Apply and + // walk away; rolling back silently isn't enough). Errors here are + // swallowed by notifyApplyFailure — they must not block the response. + if (result.outcome === 'preflight-failed') { + void notifyApplyFailure({ + outcome: 'preflight-failed', targetTag, reason: result.reason, + }); + } else if (result.outcome === 'rolled-back') { + void notifyApplyFailure({ + outcome: 'rolled-back', targetTag, reason: 'rolled-back', + }); + } + if (responded) return; // already 202'd in onAccepted; nothing more to send. switch (result.outcome) { diff --git a/src/node/hooks/express/updateStatus.ts b/src/node/hooks/express/updateStatus.ts index 69d63d889f3..5fda647b0c3 100644 --- a/src/node/hooks/express/updateStatus.ts +++ b/src/node/hooks/express/updateStatus.ts @@ -8,6 +8,7 @@ import {evaluatePolicy} from '../../updater/UpdatePolicy'; import {compareSemver, isMajorBehind, isVulnerable} from '../../updater/versionCompare'; import {loadState} from '../../updater/state'; import {isHeld} from '../../updater/lock'; +import {nextWindowStart, parseWindow} from '../../updater/MaintenanceWindow'; let badgeCache: {value: 'severe' | 'vulnerable' | null; at: number} = {value: null, at: 0}; @@ -103,9 +104,19 @@ export const expressCreateServer = ( current, latest: state.latest.version, executionStatus: state.execution.status, + maintenanceWindow: settings.updates.maintenanceWindow, }) : null; const lockHeld = await isHeld(path.join(settings.root, 'var', 'update.lock')); + // Tier 4: surface the configured window + the next opening so the admin UI + // can render the picker and the "deferred until..." subtitle on the + // scheduled panel. Non-admin requests get null for both fields (the parsed + // window is operational config, not a public datum). + const parsedWindow = parseWindow(settings.updates.maintenanceWindow); + const maintenanceWindow = isAdmin ? parsedWindow : null; + const nextWindowOpensAt = isAdmin && parsedWindow && settings.updates.tier === 'autonomous' + ? nextWindowStart(new Date(), parsedWindow).toISOString() + : null; // The Tier 2 fields (execution, lastResult) carry diagnostic strings // built from git/pnpm stderr — environment-specific paths, error @@ -132,6 +143,9 @@ export const expressCreateServer = ( execution, lastResult, lockHeld, + // PR 4 additions: + maintenanceWindow, + nextWindowOpensAt, }); })); diff --git a/src/node/updater/MaintenanceWindow.ts b/src/node/updater/MaintenanceWindow.ts new file mode 100644 index 00000000000..1708be8a4e2 --- /dev/null +++ b/src/node/updater/MaintenanceWindow.ts @@ -0,0 +1,105 @@ +/** + * Maintenance-window math for Tier 4 (autonomous updates). + * + * Pure — no I/O, no log4js, no globals beyond `Date`. Imported by: + * - `UpdatePolicy.ts` (canAutonomous gate) + * - `Scheduler.ts` (snap scheduledFor to the next window opening, defer fires) + * - `index.ts` (compute nextWindowOpensAt for /admin/update/status) + * - admin UI picker (validation) + * + * Time semantics + * -------------- + * A window is a pair of HH:MM wall-clock times plus a `tz` selector. For + * `tz: 'utc'`, comparisons use `getUTCHours/Minutes` and `Date.UTC(...)`. For + * `tz: 'local'`, they use the host's local wall clock via the standard `Date` + * constructor. `nextWindowStart` therefore returns a `Date` whose wall-clock + * components in the configured tz equal `window.start` — DST transitions are + * absorbed by the JS Date constructor's normalization (a 02:30 window-start on + * a spring-forward day silently lands at 03:30 local because 02:30 does not + * exist; documented behavior, not a bug). + * + * Cross-midnight windows are supported (`end < start` means "wraps past + * 00:00"). The `end` minute is exclusive in both same-day and cross-midnight + * cases — a `22:00-02:00` window matches `[22:00, 24:00) ∪ [00:00, 02:00)`. + */ + +export interface MaintenanceWindow { + /** Wall-clock start in `HH:MM` (24h). */ + start: string; + /** Wall-clock end in `HH:MM` (24h). Exclusive. */ + end: string; + /** Whether `start`/`end` are read against UTC or the host's local clock. */ + tz: 'local' | 'utc'; +} + +const HHMM = /^([01]\d|2[0-3]):([0-5]\d)$/; + +const toMinutes = (hhmm: string): number | null => { + const m = HHMM.exec(hhmm); + if (!m) return null; + return Number(m[1]) * 60 + Number(m[2]); +}; + +/** + * Parse and validate a raw value (typically from `settings.json`) into a + * `MaintenanceWindow`. Returns `null` for any structural or format failure — + * callers should treat that as "tier 4 disabled, fall back to tier 3". + */ +export const parseWindow = (raw: unknown): MaintenanceWindow | null => { + if (!raw || typeof raw !== 'object') return null; + const r = raw as Record; + if (typeof r.start !== 'string' || typeof r.end !== 'string') return null; + if (r.tz !== 'local' && r.tz !== 'utc') return null; + const s = toMinutes(r.start); + const e = toMinutes(r.end); + if (s == null || e == null) return null; + if (s === e) return null; + return {start: r.start, end: r.end, tz: r.tz}; +}; + +const wallMinutes = (now: Date, tz: MaintenanceWindow['tz']): number => ( + tz === 'utc' + ? now.getUTCHours() * 60 + now.getUTCMinutes() + : now.getHours() * 60 + now.getMinutes() +); + +/** + * `true` iff `now`'s wall-clock minute is within `[start, end)` in the window's + * tz. Cross-midnight windows wrap at 24:00 — see file header for the exact set. + */ +export const inWindow = (now: Date, window: MaintenanceWindow): boolean => { + const s = toMinutes(window.start); + const e = toMinutes(window.end); + if (s == null || e == null || s === e) return false; + const m = wallMinutes(now, window.tz); + return s < e ? (m >= s && m < e) : (m >= s || m < e); +}; + +const buildAt = (year: number, month: number, day: number, mins: number, + tz: MaintenanceWindow['tz']): Date => { + const h = Math.floor(mins / 60); + const mm = mins % 60; + return tz === 'utc' + ? new Date(Date.UTC(year, month, day, h, mm, 0, 0)) + : new Date(year, month, day, h, mm, 0, 0); +}; + +/** + * Smallest `Date` `t` such that `t >= now` and `t`'s wall-clock equals + * `window.start` in the window's tz. Used by Scheduler to snap a scheduledFor + * that lands outside the window forward to the next opening. + * + * If `now` is *inside* the window, the next opening is tomorrow — we don't + * collapse to `now`. Fire-now is gated by `inWindow`, not this function. + */ +export const nextWindowStart = (now: Date, window: MaintenanceWindow): Date => { + const s = toMinutes(window.start); + if (s == null) return now; + const isUtc = window.tz === 'utc'; + const year = isUtc ? now.getUTCFullYear() : now.getFullYear(); + const month = isUtc ? now.getUTCMonth() : now.getMonth(); + const day = isUtc ? now.getUTCDate() : now.getDate(); + const todayStart = buildAt(year, month, day, s, window.tz); + if (todayStart.getTime() > now.getTime()) return todayStart; + return buildAt(year, month, day + 1, s, window.tz); +}; diff --git a/src/node/updater/Notifier.ts b/src/node/updater/Notifier.ts index f37748a7c40..af560ad90cd 100644 --- a/src/node/updater/Notifier.ts +++ b/src/node/updater/Notifier.ts @@ -13,7 +13,14 @@ export interface NotifierInput { now: Date; } -export type EmailKind = 'severe' | 'vulnerable' | 'vulnerable-new-release' | 'grace-start'; +export type EmailKind = + | 'severe' + | 'vulnerable' + | 'vulnerable-new-release' + | 'grace-start' + | 'update-preflight-failed' + | 'update-rolled-back' + | 'update-rollback-failed'; export interface PlannedEmail { kind: EmailKind; @@ -86,3 +93,72 @@ export const decideEmails = (input: NotifierInput): NotifierResult => { return {toSend, newState}; }; + +export type FailureOutcome = + | 'preflight-failed' + | 'rolled-back' + | 'rollback-failed'; + +export interface OutcomeEmailInput { + adminEmail: string | null; + outcome: FailureOutcome; + /** Free-text reason string from `ApplyResult.reason` (or RollbackHandler). */ + reason: string; + /** Tag the failed apply was targeting. */ + targetTag: string; + /** Currently-running Etherpad version (so the admin sees what's live now). */ + currentVersion: string; + /** Email-state slice from UpdateState. */ + state: EmailSendLog; +} + +/** + * Decide whether to email about a non-success apply outcome. Pure — returns + * the planned email + new dedupe state; does not send. + * + * Dedupe key: `:`. Same outcome on the same tag (e.g. + * a retry loop that keeps failing `pnpm install` for v2.7.6) emits one + * email. A different outcome OR a different tag resets the dedupe key and + * fires a new email. + * + * `rollback-failed` always fires (overrides dedupe) — it's the terminal + * state that needs human intervention and the admin must learn about it + * even if a previous transient failure happened to share its key. + */ +export const decideOutcomeEmail = (input: OutcomeEmailInput): NotifierResult => { + const {adminEmail, outcome, reason, targetTag, currentVersion, state} = input; + if (!adminEmail) return {toSend: [], newState: state}; + + const key = `${outcome}:${targetTag}`; + const isTerminal = outcome === 'rollback-failed'; + if (!isTerminal && state.lastFailureKey === key) { + return {toSend: [], newState: state}; + } + + const kind: EmailKind = + outcome === 'preflight-failed' ? 'update-preflight-failed' + : outcome === 'rolled-back' ? 'update-rolled-back' + : 'update-rollback-failed'; + + const titleByKind: Record = { + 'update-preflight-failed': + `[Etherpad] Auto-update to ${targetTag} blocked at preflight`, + 'update-rolled-back': + `[Etherpad] Auto-update to ${targetTag} rolled back`, + 'update-rollback-failed': + `[Etherpad] Auto-update FAILED and could not be rolled back — manual intervention required`, + }; + + const bodyTail = isTerminal + ? ' Visit /admin/update and POST /admin/update/acknowledge after restoring a working install.' + : ' Visit /admin/update for details.'; + + const body = + `Etherpad attempted to auto-update to ${targetTag} but failed: ${reason}.\n` + + `The running version is ${currentVersion}.${bodyTail}`; + + return { + toSend: [{kind, subject: titleByKind[kind], body}], + newState: {...state, lastFailureKey: key}, + }; +}; diff --git a/src/node/updater/Scheduler.ts b/src/node/updater/Scheduler.ts index 67c6ec8e73b..dd97af27546 100644 --- a/src/node/updater/Scheduler.ts +++ b/src/node/updater/Scheduler.ts @@ -1,5 +1,6 @@ -import {EmailSendLog, ExecutionStatus, PolicyResult, ReleaseInfo, UpdateState} from './types'; +import {EmailSendLog, ExecutionStatus, MaintenanceWindow, PolicyResult, ReleaseInfo, UpdateState} from './types'; import {PlannedEmail} from './Notifier'; +import {inWindow, nextWindowStart} from './MaintenanceWindow'; export interface DecideScheduleInput { state: UpdateState; @@ -9,6 +10,13 @@ export interface DecideScheduleInput { current: string; preApplyGraceMinutes: number; adminEmail: string | null; + /** + * Tier 4 only — when `policy.canAutonomous` is true, the scheduler snaps + * `scheduledFor` forward to the next window opening (if it would otherwise + * land outside the window) and `decideTriggerApply` defers fires that the + * window has closed for. Ignored when `canAutonomous === false`. + */ + maintenanceWindow?: MaintenanceWindow | null; } export type SchedulerDecision = @@ -48,7 +56,10 @@ const clampGrace = (m: number): number => { * email when `adminEmail` is set and `email.graceStartTag !== latest.tag`. */ export const decideSchedule = (input: DecideScheduleInput): SchedulerDecision => { - const {state, now, policy, latest, current, preApplyGraceMinutes, adminEmail} = input; + const { + state, now, policy, latest, current, preApplyGraceMinutes, adminEmail, + maintenanceWindow, + } = input; const status = state.execution.status; if (!latest) return {action: 'nothing'}; @@ -66,7 +77,14 @@ export const decideSchedule = (input: DecideScheduleInput): SchedulerDecision => } const graceMs = clampGrace(preApplyGraceMinutes) * 60 * 1000; - const scheduledFor = new Date(now.getTime() + graceMs).toISOString(); + let scheduledForDate = new Date(now.getTime() + graceMs); + // Tier 4: snap forward to the next opening if grace lands outside the window. + if (policy.canAutonomous && maintenanceWindow) { + if (!inWindow(scheduledForDate, maintenanceWindow)) { + scheduledForDate = nextWindowStart(scheduledForDate, maintenanceWindow); + } + } + const scheduledFor = scheduledForDate.toISOString(); const newExecution = { status: 'scheduled' as const, targetTag: latest.tag, @@ -92,7 +110,8 @@ export const decideSchedule = (input: DecideScheduleInput): SchedulerDecision => export type TriggerApplyDecision = | {action: 'fire'} | {action: 'abort'; reason: string} - | {action: 'clear-schedule'; reason: string}; + | {action: 'clear-schedule'; reason: string} + | {action: 'defer'; nextStart: string; reason: 'outside-maintenance-window'}; /** * Decide whether the scheduler's timer-fire callback should actually run the @@ -100,10 +119,20 @@ export type TriggerApplyDecision = * arming-to-firing has a long delay (the grace window) during which the * admin can cancel, click Apply now, or flip the tier. SchedulerRunnerDeps * documents this contract; this helper is the canonical implementation. + * + * Tier 4: when `policy.canAutonomous` is true and `now` is outside the + * configured `maintenanceWindow`, returns `{action: 'defer'}` so the runner + * persists a new `scheduledFor = nextStart` and re-arms. */ export const decideTriggerApply = ({ - state, targetTag, policy, -}: {state: UpdateState; targetTag: string; policy: PolicyResult}): TriggerApplyDecision => { + state, targetTag, policy, now, maintenanceWindow, +}: { + state: UpdateState; + targetTag: string; + policy: PolicyResult; + now?: Date; + maintenanceWindow?: MaintenanceWindow | null; +}): TriggerApplyDecision => { if (state.execution.status !== 'scheduled') { return {action: 'abort', reason: `state=${state.execution.status}`}; } @@ -112,6 +141,13 @@ export const decideTriggerApply = ({ } if (!state.latest) return {action: 'abort', reason: 'no-latest'}; if (!policy.canAuto) return {action: 'clear-schedule', reason: policy.reason || 'policy-denied'}; + if (policy.canAutonomous && maintenanceWindow && now && !inWindow(now, maintenanceWindow)) { + return { + action: 'defer', + nextStart: nextWindowStart(now, maintenanceWindow).toISOString(), + reason: 'outside-maintenance-window', + }; + } return {action: 'fire'}; }; diff --git a/src/node/updater/UpdatePolicy.ts b/src/node/updater/UpdatePolicy.ts index c9ace999690..ba36905d30e 100644 --- a/src/node/updater/UpdatePolicy.ts +++ b/src/node/updater/UpdatePolicy.ts @@ -1,5 +1,6 @@ import {compareSemver} from './versionCompare'; -import {InstallMethod, PolicyResult, Tier} from './types'; +import {parseWindow} from './MaintenanceWindow'; +import {InstallMethod, MaintenanceWindow, PolicyResult, Tier} from './types'; // For PR 1 (notify only) the writable list contains only 'git'. // PR 2+ may add 'npm' here as the executor learns to handle that path. @@ -17,19 +18,29 @@ export interface PolicyInput { * intervention the terminal state requires. */ executionStatus?: string; + /** + * Configured maintenance window from `updates.maintenanceWindow`. Tier 4 + * requires a non-null, parse-valid window. When null or malformed, + * canAutonomous degrades to false with a reason of + * `maintenance-window-missing` / `maintenance-window-invalid`; the other + * permissions still resolve as if tier were `auto`. + */ + maintenanceWindow?: MaintenanceWindow | unknown | null; } /** * Decide which update tiers are allowed under the given (installMethod, tier, - * current, latest, executionStatus). Pure function — no I/O. The single source - * of truth for "what's allowed in this environment." + * current, latest, executionStatus, maintenanceWindow). Pure function — no I/O. + * The single source of truth for "what's allowed in this environment." * * `reason` is one of: * 'tier-off' | 'up-to-date' | 'install-method-not-writable' - * | 'rollback-failed-terminal' | 'ok'. + * | 'rollback-failed-terminal' + * | 'maintenance-window-missing' | 'maintenance-window-invalid' + * | 'ok'. */ export const evaluatePolicy = ({ - installMethod, tier, current, latest, executionStatus, + installMethod, tier, current, latest, executionStatus, maintenanceWindow, }: PolicyInput): PolicyResult => { if (tier === 'off') { return {canNotify: false, canManual: false, canAuto: false, canAutonomous: false, reason: 'tier-off'}; @@ -46,11 +57,24 @@ export const evaluatePolicy = ({ } const terminal = executionStatus === 'rollback-failed'; - return { - canNotify, - canManual: tier === 'manual' || tier === 'auto' || tier === 'autonomous', - canAuto: !terminal && (tier === 'auto' || tier === 'autonomous'), - canAutonomous: !terminal && tier === 'autonomous', - reason: terminal ? 'rollback-failed-terminal' : 'ok', - }; + const canManual = tier === 'manual' || tier === 'auto' || tier === 'autonomous'; + const canAuto = !terminal && (tier === 'auto' || tier === 'autonomous'); + + let canAutonomous = false; + let windowReason: string | null = null; + if (!terminal && tier === 'autonomous') { + if (maintenanceWindow == null) { + windowReason = 'maintenance-window-missing'; + } else if (parseWindow(maintenanceWindow) == null) { + windowReason = 'maintenance-window-invalid'; + } else { + canAutonomous = true; + } + } + + const reason = terminal + ? 'rollback-failed-terminal' + : (windowReason ?? 'ok'); + + return {canNotify, canManual, canAuto, canAutonomous, reason}; }; diff --git a/src/node/updater/applyPipeline.ts b/src/node/updater/applyPipeline.ts index aa6eaa8f67e..5a200a18a37 100644 --- a/src/node/updater/applyPipeline.ts +++ b/src/node/updater/applyPipeline.ts @@ -1,11 +1,11 @@ import {UpdateState} from './types'; -import {PreflightResult, PreflightReason} from './preflight'; +import {PreflightResult} from './preflight'; import {ExecutorResult} from './UpdateExecutor'; import {Drainer, DrainBroadcastKey} from './SessionDrainer'; export type ApplyOutcome = | {outcome: 'pending-verification'} - | {outcome: 'preflight-failed'; reason: PreflightReason} + | {outcome: 'preflight-failed'; reason: string} | {outcome: 'cancelled'} | {outcome: 'lock-held'} | {outcome: 'busy'; status: string} @@ -89,13 +89,17 @@ export const applyUpdate = async ( const pf = await deps.runPreflight(targetTag); if (!pf.ok) { const at = deps.now().toISOString(); + // Append the optional `detail` (e.g. "target requires Node >=26.0.0, + // running 25.0.0" for node-engine-mismatch) so the admin UI shows a + // version-specific message without requiring a separate API field. + const reasonStr = pf.detail ? `${pf.reason}: ${pf.detail}` : pf.reason; await deps.saveState({ ...preState, - execution: {status: 'preflight-failed', targetTag, reason: pf.reason, at}, - lastResult: {targetTag, fromSha: '', outcome: 'preflight-failed', reason: pf.reason, at}, + execution: {status: 'preflight-failed', targetTag, reason: reasonStr, at}, + lastResult: {targetTag, fromSha: '', outcome: 'preflight-failed', reason: reasonStr, at}, }); - deps.appendLog(`[${at}] PREFLIGHT_FAILED ${pf.reason}`); - return {outcome: 'preflight-failed', reason: pf.reason}; + deps.appendLog(`[${at}] PREFLIGHT_FAILED ${reasonStr}`); + return {outcome: 'preflight-failed', reason: reasonStr}; } // Re-load state after preflight: the cancel endpoint can flip execution diff --git a/src/node/updater/index.ts b/src/node/updater/index.ts index 99690c769b0..0b6bcc7f7bc 100644 --- a/src/node/updater/index.ts +++ b/src/node/updater/index.ts @@ -8,10 +8,11 @@ import {checkLatestRelease, realFetcher} from './VersionChecker'; import {loadState, saveState} from './state'; import {isMajorBehind, isVulnerable} from './versionCompare'; import {evaluatePolicy} from './UpdatePolicy'; -import {decideEmails} from './Notifier'; +import {decideEmails, decideOutcomeEmail, FailureOutcome} from './Notifier'; import {checkPendingVerification, CheckResult, RollbackDeps, performRollback} from './RollbackHandler'; import {executeUpdate, SpawnFn} from './UpdateExecutor'; import {createSchedulerRunner, decideSchedule, decideTriggerApply, SchedulerRunner} from './Scheduler'; +import {parseWindow} from './MaintenanceWindow'; import {applyUpdate, ApplyPipelineDeps} from './applyPipeline'; import {acquireLock, releaseLock} from './lock'; import {runPreflight} from './preflight'; @@ -42,11 +43,71 @@ export const getCurrentState = async (): Promise => { export const getDetectedInstallMethod = () => detectedMethod; +/** + * Cached nodemailer transport. Built on first use when `settings.mail.host` is + * set; never imported when mail is disabled (keeps boot costs predictable for + * installs that don't care about outbound mail). + * + * The cache is keyed on the full set of SMTP options that `buildTransport` + * consumes (host, port, secure, auth). `reloadSettings()` can mutate any of + * these at runtime, so a host-only key would silently keep using a stale + * transport when an operator rotates credentials or moves to a different + * port without changing host. + */ +let transportCache: {key: string; transporter: {sendMail: (m: any) => Promise}} | null = null; + +/** + * Stable string key derived from the SMTP options `buildTransport` consumes. + * Exported as `_internal` so tests can verify that runtime mutations to + * `port`/`secure`/`auth` (without a host change) actually invalidate the + * cache — a regression caught by Qodo on PR #7753. + */ +export const smtpTransportKey = (mail: { + host?: string | null; + port?: number | string | null; + secure?: boolean | null; + auth?: unknown; +}): string => JSON.stringify({ + host: mail.host ?? null, + port: Number(mail.port) || 587, + secure: !!mail.secure, + auth: mail.auth ?? null, +}); + +const buildTransport = async (host: string) => { + const {default: nodemailer} = await import('nodemailer'); + return nodemailer.createTransport({ + host, + port: Number(settings.mail.port) || 587, + secure: !!settings.mail.secure, + auth: settings.mail.auth ?? undefined, + }); +}; + const sendEmailViaSmtp = async (to: string, subject: string, body: string): Promise => { - // Etherpad core has no built-in SMTP. PR 1 ships the dedupe machinery without an actual sender; - // subsequent PRs can wire nodemailer or rely on a notification plugin. - logger.info(`(would send email) to=${to} subject="${subject}"`); - void body; + const host = settings.mail.host; + if (!host || !settings.mail.from) { + // Mail not configured. Log so operators running the runbook can confirm + // the Notifier fired even without delivery, and the dedupe state still + // advances so we don't re-evaluate the same trigger every tick. + logger.info(`(would send email) to=${to} subject="${subject}"`); + return; + } + const key = smtpTransportKey(settings.mail); + if (!transportCache || transportCache.key !== key) { + transportCache = {key, transporter: await buildTransport(host)}; + } + try { + await transportCache.transporter.sendMail({ + from: settings.mail.from, to, subject, text: body, + }); + logger.info(`email sent to=${to} subject="${subject}"`); + } catch (err) { + // Never throw out of the sender — a transient SMTP failure must not + // poison the surrounding updater state machine. The admin UI banner + // is still the source of truth for the underlying condition. + logger.warn(`email send failed: ${(err as Error).message}`); + } }; const performCheck = async (): Promise => { @@ -95,6 +156,7 @@ const performCheck = async (): Promise => { tier: settings.updates.tier, current, latest: state.latest.version, + maintenanceWindow: settings.updates.maintenanceWindow, }); if (policy.canNotify) { const decision = decideEmails({ @@ -115,6 +177,7 @@ const performCheck = async (): Promise => { } // Tier 3 scheduler pass: decide whether to schedule, reschedule, or cancel. + // Tier 4 snap-forward to next maintenance window is layered in here too. if (state.latest && scheduler) { const current = getEpVersion(); const policy = evaluatePolicy({ @@ -123,12 +186,14 @@ const performCheck = async (): Promise => { current, latest: state.latest.version, executionStatus: state.execution.status, + maintenanceWindow: settings.updates.maintenanceWindow, }); const decision = decideSchedule({ state, now, policy, latest: state.latest, current, preApplyGraceMinutes: Number(settings.updates.preApplyGraceMinutes) || 0, adminEmail: settings.adminEmail, + maintenanceWindow: policy.canAutonomous ? parseWindow(settings.updates.maintenanceWindow) : null, }); if (decision.action === 'schedule') { state.execution = decision.newExecution; @@ -217,6 +282,7 @@ const buildSchedulerApplyDeps = (): ApplyPipelineDeps => ({ diskSpaceMinMB: Number(settings.updates.diskSpaceMinMB) || 500, requireSignature: settings.updates.requireSignature, trustedKeysPath: settings.updates.trustedKeysPath, + currentNodeVersion: process.versions.node, }, { installMethod: detectedMethod, @@ -256,6 +322,26 @@ const buildSchedulerApplyDeps = (): ApplyPipelineDeps => ({ requireSignature: settings.updates.requireSignature, trustedKeysPath: settings.updates.trustedKeysPath, }), + readTargetEnginesNode: (tagName: string) => new Promise((resolve) => { + // Read the target tag's package.json *without* mutating the working + // tree: `git show :package.json` writes to stdout only. Treat + // any failure (missing tag, missing file, malformed JSON, missing + // engines.node) as "no constraint" — preflight already covers + // missing-tag separately; we don't want to gate updates on a + // package.json shape that older releases predate. + const c = spawn('git', ['show', `${tagName}:package.json`], + {cwd: settings.root, stdio: ['ignore', 'pipe', 'ignore']}); + let out = ''; + c.stdout.on('data', (b) => { out += b.toString(); }); + c.on('close', () => { + try { + const pkg = JSON.parse(out); + const range = pkg?.engines?.node; + resolve(typeof range === 'string' && range.trim().length > 0 ? range : null); + } catch { resolve(null); } + }); + c.on('error', () => resolve(null)); + }), }, ), createDrainer: (opts) => createDrainer(opts), @@ -299,6 +385,57 @@ const buildSchedulerApplyDeps = (): ApplyPipelineDeps => ({ /** Allow the cancel handler to drop the pending scheduler timer. */ export const cancelScheduler = (): void => { scheduler?.cancel(); }; +/** + * Map an `applyUpdate` outcome to a `FailureOutcome` for the Notifier, or + * `null` when the outcome doesn't warrant an admin email. We deliberately + * do NOT email on `cancelled` (the admin did it themselves), `busy` (UI + * already surfaced the in-flight state), `lock-held`, `invalid-tag`, or + * `no-known-latest` (all transient operational conditions surfaced via the + * banner already). The terminal `rollback-failed` is emitted separately + * from RollbackHandler's own path — applyUpdate's `rolled-back` covers the + * auto-recovered case. + */ +const failureOutcomeFromApplyResult = ( + outcome: string, +): FailureOutcome | null => { + if (outcome === 'preflight-failed') return 'preflight-failed'; + if (outcome === 'rolled-back') return 'rolled-back'; + return null; +}; + +/** + * Load state, run Notifier.decideOutcomeEmail for the given failure, send + * the planned mail (best-effort), and persist the updated dedupe key. Never + * throws — a transient SMTP issue must not poison the surrounding apply + * flow's bookkeeping. + */ +export const notifyApplyFailure = async (params: { + outcome: FailureOutcome; + reason: string; + targetTag: string; +}): Promise => { + try { + const state = await loadState(stateFilePath()); + const decision = decideOutcomeEmail({ + adminEmail: settings.adminEmail, + outcome: params.outcome, + reason: params.reason, + targetTag: params.targetTag, + currentVersion: getEpVersion(), + state: state.email, + }); + if (decision.toSend.length === 0) return; + for (const e of decision.toSend) { + if (settings.adminEmail) { + await sendEmailViaSmtp(settings.adminEmail, e.subject, e.body); + } + } + await saveState(stateFilePath(), {...state, email: decision.newState}); + } catch (err) { + logger.warn(`notifyApplyFailure: ${(err as Error).message}`); + } +}; + /** * Timer-fire callback. Re-reads persisted state and re-evaluates policy * *before* invoking applyUpdate so a last-moment cancel, a manual Apply now @@ -318,9 +455,14 @@ const schedulerTriggerApply = async (targetTag: string): Promise => { current: getEpVersion(), latest: state.latest.version, executionStatus: state.execution.status, + maintenanceWindow: settings.updates.maintenanceWindow, }) : {canNotify: false, canManual: false, canAuto: false, canAutonomous: false, reason: 'no-latest'}; - const decision = decideTriggerApply({state, targetTag, policy}); + const window = policy.canAutonomous ? parseWindow(settings.updates.maintenanceWindow) : null; + const decision = decideTriggerApply({ + state, targetTag, policy, + now: new Date(), maintenanceWindow: window, + }); if (decision.action === 'abort') { logger.info(`scheduler fired for ${targetTag} but aborting (${decision.reason})`); return; @@ -332,8 +474,27 @@ const schedulerTriggerApply = async (targetTag: string): Promise => { await saveState(stateFilePath(), {...state, execution: {status: 'idle'}}); return; } + if (decision.action === 'defer') { + // Tier 4: fire-time was outside the window. Re-arm for the next opening + // and persist the new scheduledFor so a restart in the gap rehydrates. + logger.info(`scheduler deferred ${targetTag} to next maintenance window at ${decision.nextStart}`); + const sched = state.execution.status === 'scheduled' ? state.execution : null; + if (sched) { + await saveState(stateFilePath(), { + ...state, + execution: {...sched, scheduledFor: decision.nextStart}, + }); + scheduler?.arm({targetTag, scheduledFor: decision.nextStart}); + } + return; + } const result = await applyUpdate({targetTag, deps: buildSchedulerApplyDeps()}); logger.info(`scheduler apply finished: ${result.outcome}`); + const failureKind = failureOutcomeFromApplyResult(result.outcome); + if (failureKind) { + const reason = (result as {reason?: string}).reason ?? failureKind; + await notifyApplyFailure({outcome: failureKind, reason, targetTag}); + } } catch (err) { logger.warn(`scheduler apply failed: ${(err as Error).message}`); } @@ -354,6 +515,26 @@ export const expressCreateServer = async (): Promise => { const state = await getCurrentState(); pendingVerification = checkPendingVerification(state, getRollbackDeps()); + // Boot-time failure notification. If a previous run produced a failure + // outcome whose admin email we haven't already sent (lastFailureKey + // dedupe), fire it now. Covers: + // - health-check timeout rollback on the previous boot + // - crash-loop forced rollback (detected on a later boot) + // - preflight-failed where we never got to send (e.g. process kill) + // - rollback-failed terminal that the operator hasn't acknowledged + // Fire-and-forget — the rest of boot must proceed regardless. + const failureOutcome = state.lastResult?.outcome === 'rolled-back' ? 'rolled-back' + : state.lastResult?.outcome === 'rollback-failed' ? 'rollback-failed' + : state.lastResult?.outcome === 'preflight-failed' ? 'preflight-failed' + : null; + if (failureOutcome && state.lastResult) { + void notifyApplyFailure({ + outcome: failureOutcome, + targetTag: state.lastResult.targetTag, + reason: state.lastResult.reason ?? failureOutcome, + }); + } + // Tier 3: instantiate the scheduler unless updates are entirely disabled. // The runner is purely in-memory — the persisted state file is the source // of truth for "is something scheduled." On `tier: "off"` we explicitly diff --git a/src/node/updater/preflight.ts b/src/node/updater/preflight.ts index f0403e186b6..585e5f03025 100644 --- a/src/node/updater/preflight.ts +++ b/src/node/updater/preflight.ts @@ -1,3 +1,4 @@ +import semver from 'semver'; import {InstallMethod} from './types'; import type {VerifyResult} from './trustedKeys'; @@ -8,13 +9,20 @@ export type PreflightReason = | 'pnpm-not-found' | 'lock-held' | 'remote-tag-missing' - | 'signature-verification-failed'; + | 'signature-verification-failed' + | 'node-engine-mismatch'; export interface PreflightInput { targetTag: string; diskSpaceMinMB: number; requireSignature: boolean; trustedKeysPath: string | null; + /** + * Running Node version (typically `process.versions.node`). Threaded + * through `input` rather than read from globals so the function stays + * fully testable without process mocking. + */ + currentNodeVersion: string; } export interface PreflightDeps { @@ -25,9 +33,16 @@ export interface PreflightDeps { lockHeld: () => Promise; remoteHasTag: (tag: string) => Promise; verifyTag: () => Promise; + /** + * Returns the `engines.node` field from the target tag's `package.json` + * without mutating the working tree. The implementation typically runs + * `git show :package.json` and parses the JSON. Returns `null` if + * the field is absent — that's treated as "no constraint, pass". + */ + readTargetEnginesNode: (tag: string) => Promise; } -export type PreflightResult = {ok: true} | {ok: false; reason: PreflightReason}; +export type PreflightResult = {ok: true} | {ok: false; reason: PreflightReason; detail?: string}; const WRITABLE_METHODS: ReadonlySet> = new Set(['git']); @@ -35,6 +50,11 @@ const WRITABLE_METHODS: ReadonlySet> = new Set([' * Sequenced preflight: each check is fast and reads the world. Order matters — * cheap, definitive failures (install method) run before slow ones (network * tag lookup, gpg). The first failure short-circuits. + * + * The Node-engine check runs *after* signature verification: we want the + * range to come from a trusted tag. It runs *before* anything mutates the + * working tree (the executor does the first `git checkout` after we return + * ok), so a failure leaves the system exactly as it was — no rollback needed. */ export const runPreflight = async ( input: PreflightInput, @@ -50,5 +70,15 @@ export const runPreflight = async ( if (!await deps.remoteHasTag(input.targetTag)) return {ok: false, reason: 'remote-tag-missing'}; const sig = await deps.verifyTag(); if (!sig.ok) return {ok: false, reason: 'signature-verification-failed'}; + + const range = await deps.readTargetEnginesNode(input.targetTag); + if (range && !semver.satisfies(input.currentNodeVersion, range, {includePrerelease: true})) { + return { + ok: false, + reason: 'node-engine-mismatch', + detail: `target requires Node ${range}, running ${input.currentNodeVersion}`, + }; + } + return {ok: true}; }; diff --git a/src/node/updater/state.ts b/src/node/updater/state.ts index f539a7f1408..64492763aef 100644 --- a/src/node/updater/state.ts +++ b/src/node/updater/state.ts @@ -88,14 +88,16 @@ const isValidVulnerableBelow = (v: unknown): boolean => { const isValidEmail = (v: unknown): boolean => { if (!isPlainObject(v)) return false; - // graceStartTag was added in Tier 3. Treat as optional for backwards - // compatibility with state files written by Tier 1/2 installs; loadState - // backfills the missing field to null. If present, must be string|null. + // graceStartTag (Tier 3) and lastFailureKey (Tier 4) are both optional for + // backwards compatibility with state files written by earlier installs; + // loadState backfills missing fields to null. If present, must be string|null. const graceOk = v.graceStartTag === undefined || isStringOrNull(v.graceStartTag); + const failOk = v.lastFailureKey === undefined || isStringOrNull(v.lastFailureKey); return isStringOrNull(v.severeAt) && isStringOrNull(v.vulnerableAt) && isStringOrNull(v.vulnerableNewReleaseTag) - && graceOk; + && graceOk + && failOk; }; // Validate the full shape so loadState() actually delivers on its "safely @@ -145,7 +147,11 @@ export const loadState = async (filePath: string): Promise => { return { ...structuredClone(EMPTY_STATE), ...partial, - email: {...email, graceStartTag: email.graceStartTag ?? null}, + email: { + ...email, + graceStartTag: email.graceStartTag ?? null, + lastFailureKey: email.lastFailureKey ?? null, + }, execution: partial.execution ?? structuredClone(EMPTY_STATE.execution), bootCount: partial.bootCount ?? 0, lastResult: partial.lastResult ?? null, diff --git a/src/node/updater/types.ts b/src/node/updater/types.ts index 01732eccc46..5ebf3c4cc4e 100644 --- a/src/node/updater/types.ts +++ b/src/node/updater/types.ts @@ -2,6 +2,17 @@ export type InstallMethod = 'auto' | 'git' | 'docker' | 'npm' | 'managed'; export type Tier = 'off' | 'notify' | 'manual' | 'auto' | 'autonomous'; +/** + * Tier 4 (autonomous) maintenance window. `start`/`end` are HH:MM (24h) in the + * configured `tz`. `end` is exclusive; `end < start` denotes a cross-midnight + * window. See `MaintenanceWindow.ts` for the parser/predicate implementation. + */ +export interface MaintenanceWindow { + start: string; + end: string; + tz: 'local' | 'utc'; +} + /** null = up-to-date (or not yet checked); 'severe' = at least one major version behind; 'vulnerable' = matched a vulnerable-below directive. */ export type OutdatedLevel = null | 'severe' | 'vulnerable'; @@ -45,6 +56,13 @@ export interface EmailSendLog { vulnerableNewReleaseTag: string | null; /** Tag of the most recent release for which we sent a Tier 3 `grace-start` email. */ graceStartTag: string | null; + /** + * Dedupe key for `update-rolled-back` / `update-preflight-failed` emails. + * Stores the `:` of the last failure we emailed about so a + * retry-loop (e.g. repeated `pnpm install` failures on the same release) + * doesn't fire one email per attempt. Cleared when the next outcome differs. + */ + lastFailureKey: string | null; } /** @@ -123,6 +141,7 @@ export const EMPTY_STATE: UpdateState = { vulnerableAt: null, vulnerableNewReleaseTag: null, graceStartTag: null, + lastFailureKey: null, }, execution: {status: 'idle'}, bootCount: 0, diff --git a/src/node/utils/Settings.ts b/src/node/utils/Settings.ts index 230ffcbcd12..48d78b34c2d 100644 --- a/src/node/utils/Settings.ts +++ b/src/node/utils/Settings.ts @@ -345,11 +345,30 @@ export type SettingsType = { requireSignature: boolean, /** Override the OS keyring location (passed to git verify-tag via $GNUPGHOME). */ trustedKeysPath: string | null, + /** + * Tier 4: nightly window during which the scheduler is allowed to fire. + * Null = tier 4 disabled (canAutonomous is denied with reason + * `maintenance-window-missing`). Shape validated at boot by `parseWindow`. + */ + maintenanceWindow: {start: string; end: string; tz: 'local' | 'utc'} | null, }, adminOpenAPI: { enabled: boolean, }, adminEmail: string | null, + /** + * SMTP transport for outbound admin notifications (updater + future + * features). Null `host` disables outbound mail — the Notifier still runs + * and dedupe state is updated, but messages only log `(would send email)`. + * `auth` is optional; omit for unauthenticated relays. + */ + mail: { + host: string | null; + port: number; + secure: boolean; + from: string | null; + auth: {user: string; pass: string} | null; + }, getPublicSettings: () => Pick, } @@ -547,6 +566,9 @@ const settings: SettingsType = { diskSpaceMinMB: 500, requireSignature: false, trustedKeysPath: null, + // Tier 4: night-window during which the scheduler may fire. Null disables tier 4 only. + // Example: { start: "03:00", end: "05:00", tz: "local" } or tz: "utc". + maintenanceWindow: null, }, /** * Admin OpenAPI document endpoint at /admin/openapi.json. @@ -565,6 +587,19 @@ const settings: SettingsType = { * Null disables outbound mail from the updater. */ adminEmail: null, + /** + * SMTP transport for outbound admin notifications. Null `host` keeps the + * legacy log-only behaviour. Set `host`+`from` (and optionally `auth`) to + * deliver via nodemailer. The dependency is lazy-loaded — installs without + * a mail.host pay no runtime cost. + */ + mail: { + host: null, + port: 587, + secure: false, + from: null, + auth: null, + }, /** * Whether certain shortcut keys are enabled for a user in the pad */ diff --git a/src/package.json b/src/package.json index 56d0ff93afc..c0676925087 100644 --- a/src/package.json +++ b/src/package.json @@ -65,6 +65,7 @@ "mssql": "^12.5.3", "mysql2": "^3.22.3", "nano": "^11.0.5", + "nodemailer": "^8.0.7", "oidc-provider": "9.8.3", "openapi-backend": "^5.16.1", "pdfkit": "^0.18.0", @@ -114,6 +115,7 @@ "@types/mime-types": "^3.0.1", "@types/mocha": "^10.0.9", "@types/node": "^25.8.0", + "@types/nodemailer": "^8.0.0", "@types/oidc-provider": "^9.5.0", "@types/pdfkit": "^0.17.6", "@types/semver": "^7.7.1", diff --git a/src/tests/backend-new/specs/updater/MaintenanceWindow.test.ts b/src/tests/backend-new/specs/updater/MaintenanceWindow.test.ts new file mode 100644 index 00000000000..1151d20e31e --- /dev/null +++ b/src/tests/backend-new/specs/updater/MaintenanceWindow.test.ts @@ -0,0 +1,130 @@ +import {describe, it, expect} from 'vitest'; +import { + parseWindow, + inWindow, + nextWindowStart, +} from '../../../../node/updater/MaintenanceWindow'; + +describe('parseWindow', () => { + it('accepts a valid same-day window with tz=local', () => { + expect(parseWindow({start: '03:00', end: '05:00', tz: 'local'})).toEqual({ + start: '03:00', end: '05:00', tz: 'local', + }); + }); + it('accepts a cross-midnight window', () => { + expect(parseWindow({start: '22:00', end: '02:00', tz: 'utc'})).toEqual({ + start: '22:00', end: '02:00', tz: 'utc', + }); + }); + it('rejects malformed start/end strings', () => { + expect(parseWindow({start: '3:00', end: '05:00', tz: 'local'})).toBeNull(); + expect(parseWindow({start: '03:60', end: '05:00', tz: 'local'})).toBeNull(); + expect(parseWindow({start: '24:00', end: '05:00', tz: 'local'})).toBeNull(); + expect(parseWindow({start: 'oops', end: '05:00', tz: 'local'})).toBeNull(); + }); + it('rejects start === end (zero-length window)', () => { + expect(parseWindow({start: '03:00', end: '03:00', tz: 'local'})).toBeNull(); + }); + it('rejects unknown tz', () => { + expect(parseWindow({start: '03:00', end: '05:00', tz: 'pacific'})).toBeNull(); + }); + it('rejects non-object / missing fields', () => { + expect(parseWindow(null)).toBeNull(); + expect(parseWindow('03:00-05:00')).toBeNull(); + expect(parseWindow({start: '03:00', tz: 'local'})).toBeNull(); + expect(parseWindow({})).toBeNull(); + }); +}); + +describe('inWindow — same-day windows, tz=utc', () => { + const w = {start: '03:00', end: '05:00', tz: 'utc' as const}; + it('inside the window', () => { + expect(inWindow(new Date('2026-05-15T03:30:00Z'), w)).toBe(true); + expect(inWindow(new Date('2026-05-15T03:00:00Z'), w)).toBe(true); + }); + it('outside before start', () => { + expect(inWindow(new Date('2026-05-15T02:59:59Z'), w)).toBe(false); + }); + it('exact end is excluded', () => { + expect(inWindow(new Date('2026-05-15T05:00:00Z'), w)).toBe(false); + }); + it('outside after end', () => { + expect(inWindow(new Date('2026-05-15T06:00:00Z'), w)).toBe(false); + }); +}); + +describe('inWindow — cross-midnight windows, tz=utc', () => { + const w = {start: '22:00', end: '02:00', tz: 'utc' as const}; + it('inside before midnight', () => { + expect(inWindow(new Date('2026-05-15T23:00:00Z'), w)).toBe(true); + }); + it('inside after midnight', () => { + expect(inWindow(new Date('2026-05-16T01:00:00Z'), w)).toBe(true); + }); + it('exact end is excluded', () => { + expect(inWindow(new Date('2026-05-16T02:00:00Z'), w)).toBe(false); + }); + it('outside in the daytime gap', () => { + expect(inWindow(new Date('2026-05-15T12:00:00Z'), w)).toBe(false); + expect(inWindow(new Date('2026-05-15T21:59:59Z'), w)).toBe(false); + }); +}); + +describe('inWindow — tz=local respects host wall clock', () => { + it('matches the host-local hour, not UTC', () => { + // Construct a Date from local components so the local hour is known + // regardless of the host TZ. + const localFour = new Date(2026, 4, 15, 4, 0, 0); // May 15 04:00 local + const w = {start: '03:00', end: '05:00', tz: 'local' as const}; + expect(inWindow(localFour, w)).toBe(true); + const localSix = new Date(2026, 4, 15, 6, 0, 0); + expect(inWindow(localSix, w)).toBe(false); + }); +}); + +describe('nextWindowStart — same-day, tz=utc', () => { + const w = {start: '03:00', end: '05:00', tz: 'utc' as const}; + it('before today\'s start returns today at start', () => { + expect(nextWindowStart(new Date('2026-05-15T01:00:00Z'), w).toISOString()) + .toBe('2026-05-15T03:00:00.000Z'); + }); + it('inside the window returns next day at start', () => { + expect(nextWindowStart(new Date('2026-05-15T03:30:00Z'), w).toISOString()) + .toBe('2026-05-16T03:00:00.000Z'); + }); + it('after today\'s end returns next day at start', () => { + expect(nextWindowStart(new Date('2026-05-15T06:00:00Z'), w).toISOString()) + .toBe('2026-05-16T03:00:00.000Z'); + }); +}); + +describe('nextWindowStart — cross-midnight, tz=utc', () => { + const w = {start: '22:00', end: '02:00', tz: 'utc' as const}; + it('before today\'s start returns today at start', () => { + expect(nextWindowStart(new Date('2026-05-15T10:00:00Z'), w).toISOString()) + .toBe('2026-05-15T22:00:00.000Z'); + }); + it('between midnight and end returns same-day start (today) since today\'s start has passed → tomorrow', () => { + // 01:00 is inside the window that started "yesterday at 22:00". The next + // window-start ≥ now is *today* at 22:00. + expect(nextWindowStart(new Date('2026-05-16T01:00:00Z'), w).toISOString()) + .toBe('2026-05-16T22:00:00.000Z'); + }); + it('after today\'s start (inside the window) returns tomorrow', () => { + expect(nextWindowStart(new Date('2026-05-15T23:30:00Z'), w).toISOString()) + .toBe('2026-05-16T22:00:00.000Z'); + }); +}); + +describe('nextWindowStart — tz=local', () => { + it('returns a Date whose local components match start', () => { + const w = {start: '03:00', end: '05:00', tz: 'local' as const}; + const now = new Date(2026, 4, 15, 1, 0, 0); // May 15 01:00 local + const next = nextWindowStart(now, w); + expect(next.getFullYear()).toBe(2026); + expect(next.getMonth()).toBe(4); // May + expect(next.getDate()).toBe(15); + expect(next.getHours()).toBe(3); + expect(next.getMinutes()).toBe(0); + }); +}); diff --git a/src/tests/backend-new/specs/updater/Notifier.test.ts b/src/tests/backend-new/specs/updater/Notifier.test.ts index 8296ba9c3a7..e8bbff4af3f 100644 --- a/src/tests/backend-new/specs/updater/Notifier.test.ts +++ b/src/tests/backend-new/specs/updater/Notifier.test.ts @@ -1,5 +1,5 @@ import {describe, it, expect} from 'vitest'; -import {decideEmails, NotifierInput} from '../../../../node/updater/Notifier'; +import {decideEmails, decideOutcomeEmail, NotifierInput} from '../../../../node/updater/Notifier'; import {EMPTY_STATE} from '../../../../node/updater/types'; const base: NotifierInput = { @@ -93,3 +93,79 @@ describe('decideEmails', () => { expect(r.newState.vulnerableAt).toBe('2026-04-25T12:00:00.000Z'); }); }); + +describe('decideOutcomeEmail', () => { + const failureBase = { + adminEmail: 'ops@example.com', + reason: 'pnpm install exit 1', + targetTag: 'v2.7.6', + currentVersion: '2.7.5', + state: EMPTY_STATE.email, + }; + + it('does nothing when adminEmail is null', () => { + const r = decideOutcomeEmail({...failureBase, adminEmail: null, outcome: 'rolled-back'}); + expect(r.toSend).toEqual([]); + expect(r.newState).toBe(failureBase.state); + }); + + it('emits update-rolled-back on first failure for a tag', () => { + const r = decideOutcomeEmail({...failureBase, outcome: 'rolled-back'}); + expect(r.toSend).toHaveLength(1); + expect(r.toSend[0].kind).toBe('update-rolled-back'); + expect(r.toSend[0].subject).toContain('v2.7.6'); + expect(r.toSend[0].body).toContain('pnpm install exit 1'); + expect(r.toSend[0].body).toContain('2.7.5'); + expect(r.newState.lastFailureKey).toBe('rolled-back:v2.7.6'); + }); + + it('emits update-preflight-failed for that outcome', () => { + const r = decideOutcomeEmail({...failureBase, outcome: 'preflight-failed', reason: 'node-engine-mismatch: target requires Node >=26'}); + expect(r.toSend[0].kind).toBe('update-preflight-failed'); + expect(r.toSend[0].body).toContain('node-engine-mismatch'); + expect(r.newState.lastFailureKey).toBe('preflight-failed:v2.7.6'); + }); + + it('emits update-rollback-failed on the terminal outcome', () => { + const r = decideOutcomeEmail({...failureBase, outcome: 'rollback-failed', reason: 'restore checkout exit 128'}); + expect(r.toSend[0].kind).toBe('update-rollback-failed'); + expect(r.toSend[0].subject).toContain('manual intervention'); + expect(r.toSend[0].body).toContain('/admin/update/acknowledge'); + }); + + it('dedupes the same outcome on the same tag (retry-loop guard)', () => { + const first = decideOutcomeEmail({...failureBase, outcome: 'rolled-back'}); + const second = decideOutcomeEmail({ + ...failureBase, outcome: 'rolled-back', state: first.newState, + }); + expect(second.toSend).toEqual([]); + // newState pointer unchanged when dedup hit. + expect(second.newState).toBe(first.newState); + }); + + it('re-emits when the outcome differs on the same tag', () => { + const first = decideOutcomeEmail({...failureBase, outcome: 'preflight-failed'}); + const second = decideOutcomeEmail({ + ...failureBase, outcome: 'rolled-back', state: first.newState, + }); + expect(second.toSend).toHaveLength(1); + expect(second.newState.lastFailureKey).toBe('rolled-back:v2.7.6'); + }); + + it('re-emits when the same outcome happens on a different tag', () => { + const first = decideOutcomeEmail({...failureBase, outcome: 'rolled-back'}); + const second = decideOutcomeEmail({ + ...failureBase, targetTag: 'v2.7.7', outcome: 'rolled-back', state: first.newState, + }); + expect(second.toSend).toHaveLength(1); + expect(second.newState.lastFailureKey).toBe('rolled-back:v2.7.7'); + }); + + it('rollback-failed always fires (overrides dedupe — terminal state matters more than spam)', () => { + const first = decideOutcomeEmail({...failureBase, outcome: 'rollback-failed'}); + const second = decideOutcomeEmail({ + ...failureBase, outcome: 'rollback-failed', state: first.newState, + }); + expect(second.toSend).toHaveLength(1); + }); +}); diff --git a/src/tests/backend-new/specs/updater/Scheduler.test.ts b/src/tests/backend-new/specs/updater/Scheduler.test.ts index 8dbbbc66b3f..1591d79f8bf 100644 --- a/src/tests/backend-new/specs/updater/Scheduler.test.ts +++ b/src/tests/backend-new/specs/updater/Scheduler.test.ts @@ -352,3 +352,99 @@ describe('decideTriggerApply', () => { expect(d).toEqual({action: 'clear-schedule', reason: 'policy-denied'}); }); }); + +describe('Tier 4 — maintenance-window gating', () => { + const release: ReleaseInfo = { + tag: 'v2.0.1', version: '2.0.1', body: '', publishedAt: '2026-05-11T00:00:00.000Z', + prerelease: false, htmlUrl: 'https://example.com', + }; + const policyAutonomous: PolicyResult = { + canNotify: true, canManual: true, canAuto: true, canAutonomous: true, reason: 'ok', + }; + const window = {start: '03:00', end: '05:00', tz: 'utc' as const}; + + it('decideSchedule snaps scheduledFor forward to the next window opening', () => { + const state: UpdateState = {...EMPTY_STATE, latest: release}; + const d = decideSchedule({ + state, now: new Date('2026-05-11T10:00:00.000Z'), policy: policyAutonomous, + latest: release, current: '2.0.0', preApplyGraceMinutes: 15, adminEmail: null, + maintenanceWindow: window, + }); + expect(d.action).toBe('schedule'); + if (d.action === 'schedule') { + expect(d.newExecution.scheduledFor).toBe('2026-05-12T03:00:00.000Z'); + } + }); + + it('decideSchedule keeps scheduledFor at now+grace when grace lands inside the window', () => { + const state: UpdateState = {...EMPTY_STATE, latest: release}; + const d = decideSchedule({ + state, now: new Date('2026-05-11T03:30:00.000Z'), policy: policyAutonomous, + latest: release, current: '2.0.0', preApplyGraceMinutes: 15, adminEmail: null, + maintenanceWindow: window, + }); + expect(d.action).toBe('schedule'); + if (d.action === 'schedule') { + expect(d.newExecution.scheduledFor).toBe('2026-05-11T03:45:00.000Z'); + } + }); + + it('decideSchedule ignores the window when policy.canAutonomous is false', () => { + const state: UpdateState = {...EMPTY_STATE, latest: release}; + const d = decideSchedule({ + state, now: new Date('2026-05-11T10:00:00.000Z'), + policy: {...policyAutonomous, canAutonomous: false}, + latest: release, current: '2.0.0', preApplyGraceMinutes: 15, adminEmail: null, + maintenanceWindow: window, + }); + expect(d.action).toBe('schedule'); + if (d.action === 'schedule') { + // Standard tier 3 grace, no snap. + expect(d.newExecution.scheduledFor).toBe('2026-05-11T10:15:00.000Z'); + } + }); + + it('decideTriggerApply defers when canAutonomous + outside window at fire time', () => { + const state: UpdateState = { + ...EMPTY_STATE, latest: release, + execution: {status: 'scheduled', targetTag: 'v2.0.1', + scheduledFor: '2026-05-11T03:00:00.000Z', startedAt: '2026-05-11T02:45:00.000Z'}, + }; + const d = decideTriggerApply({ + state, targetTag: 'v2.0.1', policy: policyAutonomous, + now: new Date('2026-05-11T10:00:00.000Z'), maintenanceWindow: window, + }); + expect(d.action).toBe('defer'); + if (d.action === 'defer') { + expect(d.nextStart).toBe('2026-05-12T03:00:00.000Z'); + expect(d.reason).toBe('outside-maintenance-window'); + } + }); + + it('decideTriggerApply fires when canAutonomous + inside window', () => { + const state: UpdateState = { + ...EMPTY_STATE, latest: release, + execution: {status: 'scheduled', targetTag: 'v2.0.1', + scheduledFor: '2026-05-11T03:00:00.000Z', startedAt: '2026-05-11T02:45:00.000Z'}, + }; + const d = decideTriggerApply({ + state, targetTag: 'v2.0.1', policy: policyAutonomous, + now: new Date('2026-05-11T03:30:00.000Z'), maintenanceWindow: window, + }); + expect(d).toEqual({action: 'fire'}); + }); + + it('decideSchedule re-uses graceStartTag dedupe across a defer/re-schedule cycle', () => { + const state: UpdateState = { + ...EMPTY_STATE, latest: release, + email: {...EMPTY_STATE.email, graceStartTag: 'v2.0.1'}, + }; + const d = decideSchedule({ + state, now: new Date('2026-05-11T10:00:00.000Z'), policy: policyAutonomous, + latest: release, current: '2.0.0', preApplyGraceMinutes: 15, + adminEmail: 'ops@example.com', maintenanceWindow: window, + }); + expect(d.action).toBe('schedule'); + if (d.action === 'schedule') expect(d.emails).toEqual([]); + }); +}); diff --git a/src/tests/backend-new/specs/updater/UpdatePolicy.test.ts b/src/tests/backend-new/specs/updater/UpdatePolicy.test.ts index 3eb74ef01bf..a7bda597c16 100644 --- a/src/tests/backend-new/specs/updater/UpdatePolicy.test.ts +++ b/src/tests/backend-new/specs/updater/UpdatePolicy.test.ts @@ -7,6 +7,10 @@ const baseInput = { tier: 'manual' as Tier, current: '2.7.1', latest: '2.7.2', + // Default to a valid window so tier-4 cases below can assert canAutonomous + // without also having to wire a window each time. The "no window" + "invalid + // window" cases set this explicitly. + maintenanceWindow: {start: '03:00', end: '05:00', tz: 'local' as const}, }; describe('evaluatePolicy', () => { @@ -93,3 +97,44 @@ describe('evaluatePolicy terminal-state gating', () => { expect(r.canAutonomous).toBe(true); }); }); + +describe('evaluatePolicy tier 4 — maintenance window gating', () => { + it('autonomous without a window degrades to canAuto only', () => { + const r = evaluatePolicy({ + ...baseInput, tier: 'autonomous', maintenanceWindow: null, + }); + expect(r.canManual).toBe(true); + expect(r.canAuto).toBe(true); + expect(r.canAutonomous).toBe(false); + expect(r.reason).toBe('maintenance-window-missing'); + }); + + it('autonomous with a malformed window degrades to canAuto only', () => { + const r = evaluatePolicy({ + ...baseInput, tier: 'autonomous', + maintenanceWindow: {start: 'oops', end: '05:00', tz: 'local'}, + }); + expect(r.canAutonomous).toBe(false); + expect(r.reason).toBe('maintenance-window-invalid'); + }); + + it('lower tiers ignore the maintenance window (reason stays ok)', () => { + const r = evaluatePolicy({ + ...baseInput, tier: 'auto', maintenanceWindow: null, + }); + expect(r.canAuto).toBe(true); + expect(r.canAutonomous).toBe(false); + expect(r.reason).toBe('ok'); + }); + + it('rollback-failed still wins over the window denial', () => { + const r = evaluatePolicy({ + ...baseInput, tier: 'autonomous', + maintenanceWindow: null, + executionStatus: 'rollback-failed', + }); + expect(r.canAuto).toBe(false); + expect(r.canAutonomous).toBe(false); + expect(r.reason).toBe('rollback-failed-terminal'); + }); +}); diff --git a/src/tests/backend-new/specs/updater/applyPipeline.test.ts b/src/tests/backend-new/specs/updater/applyPipeline.test.ts index eb0cb37f2ac..e23e63e4991 100644 --- a/src/tests/backend-new/specs/updater/applyPipeline.test.ts +++ b/src/tests/backend-new/specs/updater/applyPipeline.test.ts @@ -84,6 +84,26 @@ describe('applyUpdate (extracted pipeline)', () => { expect(final.lastResult?.reason).toBe('low-disk-space'); }); + it('preserves the preflight detail in the returned reason (HTTP + email use the return value)', async () => { + // Regression: applyUpdate built `reasonStr = reason: detail` for state + + // logs but returned only `pf.reason`, so /admin/update/apply 409 bodies + // and failure-notify emails lost the engine-mismatch detail. + const {deps, loadState} = baseDeps(); + deps.runPreflight = async () => ({ + ok: false, + reason: 'node-engine-mismatch', + detail: 'target requires Node >=26.0.0, running 25.0.0', + }); + const r = await applyUpdate({targetTag: 'v2.0.1', deps}); + expect(r).toEqual({ + outcome: 'preflight-failed', + reason: 'node-engine-mismatch: target requires Node >=26.0.0, running 25.0.0', + }); + const final = loadState(); + expect(final.lastResult?.reason) + .toBe('node-engine-mismatch: target requires Node >=26.0.0, running 25.0.0'); + }); + it('returns cancelled when the post-preflight state check shows state was reset (admin cancelled mid-preflight)', async () => { const {deps} = baseDeps(); // First preflight pass mutates state to 'preflight'. Then the cancel handler diff --git a/src/tests/backend-new/specs/updater/preflight.test.ts b/src/tests/backend-new/specs/updater/preflight.test.ts index 5926c7864bd..8ff425a103f 100644 --- a/src/tests/backend-new/specs/updater/preflight.test.ts +++ b/src/tests/backend-new/specs/updater/preflight.test.ts @@ -10,6 +10,7 @@ const baseDeps = (): PreflightDeps => ({ lockHeld: vi.fn(async () => false), remoteHasTag: vi.fn(async () => true), verifyTag: vi.fn(async (): Promise => ({ok: true, reason: 'signature-not-required'})), + readTargetEnginesNode: vi.fn(async () => null), }); const baseInput = { @@ -17,6 +18,7 @@ const baseInput = { diskSpaceMinMB: 500, requireSignature: false, trustedKeysPath: null as string | null, + currentNodeVersion: '25.0.0', }; describe('runPreflight', () => { @@ -75,4 +77,58 @@ describe('runPreflight', () => { expect(r.ok).toBe(false); expect(deps.remoteHasTag).not.toHaveBeenCalled(); }); + + describe('Node engine check', () => { + it('passes when target has no engines.node', async () => { + const r = await runPreflight(baseInput, { + ...baseDeps(), readTargetEnginesNode: vi.fn(async () => null), + }); + expect(r).toEqual({ok: true}); + }); + + it('passes when current Node satisfies the range', async () => { + const r = await runPreflight(baseInput, { + ...baseDeps(), readTargetEnginesNode: vi.fn(async () => '>=25.0.0'), + }); + expect(r).toEqual({ok: true}); + }); + + it('fails when current Node is below a future floor (e.g. node 25 vs >=26)', async () => { + const r = await runPreflight(baseInput, { + ...baseDeps(), readTargetEnginesNode: vi.fn(async () => '>=26.0.0'), + }); + expect(r.ok).toBe(false); + if (!r.ok) { + expect(r.reason).toBe('node-engine-mismatch'); + expect(r.detail).toContain('Node >=26.0.0'); + expect(r.detail).toContain('25.0.0'); + } + }); + + it('handles caret ranges', async () => { + const r = await runPreflight({...baseInput, currentNodeVersion: '24.5.0'}, { + ...baseDeps(), readTargetEnginesNode: vi.fn(async () => '^25.0.0'), + }); + expect(r.ok).toBe(false); + if (!r.ok) expect(r.reason).toBe('node-engine-mismatch'); + }); + + it('handles loose ranges with spaces', async () => { + const r = await runPreflight(baseInput, { + ...baseDeps(), readTargetEnginesNode: vi.fn(async () => '>= 25.0.0'), + }); + expect(r).toEqual({ok: true}); + }); + + it('runs after signature verification (engine check should not gate trust)', async () => { + const readEngines = vi.fn(async () => '>=99.0.0'); + const r = await runPreflight(baseInput, { + ...baseDeps(), + verifyTag: vi.fn(async (): Promise => ({ok: false, reason: 'signature-verification-failed'})), + readTargetEnginesNode: readEngines, + }); + expect(r).toEqual({ok: false, reason: 'signature-verification-failed'}); + expect(readEngines).not.toHaveBeenCalled(); + }); + }); }); diff --git a/src/tests/backend-new/specs/updater/smtpTransportKey.test.ts b/src/tests/backend-new/specs/updater/smtpTransportKey.test.ts new file mode 100644 index 00000000000..cd8dad8046f --- /dev/null +++ b/src/tests/backend-new/specs/updater/smtpTransportKey.test.ts @@ -0,0 +1,41 @@ +import {describe, it, expect} from 'vitest'; +import {smtpTransportKey} from '../../../../node/updater/index'; + +describe('smtpTransportKey', () => { + // Regression for Qodo PR #7753 review: the nodemailer transport cache was + // invalidated only on host change. Operators rotating SMTP credentials or + // moving to a different port without changing host would keep using the + // stale transport after reloadSettings(). + + it('differs when port changes', () => { + const base = {host: 'smtp.example.com', port: 587, secure: false, auth: null}; + expect(smtpTransportKey(base)) + .not.toBe(smtpTransportKey({...base, port: 465})); + }); + + it('differs when secure flag changes', () => { + const base = {host: 'smtp.example.com', port: 587, secure: false, auth: null}; + expect(smtpTransportKey(base)) + .not.toBe(smtpTransportKey({...base, secure: true})); + }); + + it('differs when auth changes', () => { + const base = {host: 'smtp.example.com', port: 587, secure: false, + auth: {user: 'a', pass: '1'}}; + expect(smtpTransportKey(base)) + .not.toBe(smtpTransportKey({...base, auth: {user: 'a', pass: '2'}})); + }); + + it('is stable for an unchanged config (cache hit on repeat calls)', () => { + const cfg = {host: 'smtp.example.com', port: 587, secure: false, + auth: {user: 'a', pass: '1'}}; + expect(smtpTransportKey(cfg)).toBe(smtpTransportKey({...cfg})); + }); + + it('falls back to port 587 when port is unset or non-numeric', () => { + expect(smtpTransportKey({host: 'h'})) + .toBe(smtpTransportKey({host: 'h', port: 587})); + expect(smtpTransportKey({host: 'h', port: 'not-a-number' as any})) + .toBe(smtpTransportKey({host: 'h', port: 587})); + }); +}); diff --git a/src/tests/backend/specs/updater-window-integration.ts b/src/tests/backend/specs/updater-window-integration.ts new file mode 100644 index 00000000000..a93875f8da6 --- /dev/null +++ b/src/tests/backend/specs/updater-window-integration.ts @@ -0,0 +1,147 @@ +'use strict'; + +import path from 'node:path'; +import fs from 'node:fs/promises'; +import os from 'node:os'; +import {strict as assert} from 'assert'; +import {EMPTY_STATE, MaintenanceWindow, PolicyResult, ReleaseInfo} from '../../../node/updater/types'; +import {loadState, saveState} from '../../../node/updater/state'; +import {decideSchedule, decideTriggerApply} from '../../../node/updater/Scheduler'; + +const release: ReleaseInfo = { + tag: 'v9.9.9', + version: '9.9.9', + body: '', + publishedAt: '2026-05-11T00:00:00.000Z', + prerelease: false, + htmlUrl: 'https://example.com', +}; + +const policyAutonomous: PolicyResult = { + canNotify: true, canManual: true, canAuto: true, canAutonomous: true, reason: 'ok', +}; + +const window: MaintenanceWindow = {start: '03:00', end: '05:00', tz: 'utc'}; + +describe('Tier 4 scheduler — maintenance-window boundary integration', function () { + this.timeout(15000); + + let root: string; + let stateFile: string; + + beforeEach(async () => { + root = await fs.mkdtemp(path.join(os.tmpdir(), 'epwindow-')); + await fs.mkdir(path.join(root, 'var'), {recursive: true}); + stateFile = path.join(root, 'var', 'update-state.json'); + }); + + afterEach(async () => { await fs.rm(root, {recursive: true, force: true}); }); + + it('outside-window: snap scheduledFor forward to next opening and persist', async () => { + const now = new Date('2026-05-11T10:00:00.000Z'); + const initial = {...EMPTY_STATE, latest: release}; + await saveState(stateFile, initial); + + const state = await loadState(stateFile); + const decision = decideSchedule({ + state, now, policy: policyAutonomous, latest: release, current: '2.0.0', + preApplyGraceMinutes: 1, adminEmail: null, maintenanceWindow: window, + }); + assert.equal(decision.action, 'schedule'); + if (decision.action !== 'schedule') return; + assert.equal(decision.newExecution.scheduledFor, '2026-05-12T03:00:00.000Z'); + + await saveState(stateFile, {...state, execution: decision.newExecution}); + const reloaded = await loadState(stateFile); + assert.equal(reloaded.execution.status, 'scheduled'); + if (reloaded.execution.status !== 'scheduled') return; + assert.equal(reloaded.execution.scheduledFor, '2026-05-12T03:00:00.000Z'); + }); + + it('inside-window at fire-time: decideTriggerApply returns fire', async () => { + const stateOnDisk = { + ...EMPTY_STATE, + latest: release, + execution: { + status: 'scheduled' as const, targetTag: release.tag, + scheduledFor: '2026-05-12T03:00:00.000Z', + startedAt: '2026-05-11T10:00:00.000Z', + }, + }; + await saveState(stateFile, stateOnDisk); + const state = await loadState(stateFile); + + const decision = decideTriggerApply({ + state, targetTag: release.tag, policy: policyAutonomous, + now: new Date('2026-05-12T03:30:00.000Z'), maintenanceWindow: window, + }); + assert.deepEqual(decision, {action: 'fire'}); + }); + + it('window-closes-mid-grace: defer carries a new nextStart and persists', async () => { + const stateOnDisk = { + ...EMPTY_STATE, + latest: release, + execution: { + status: 'scheduled' as const, targetTag: release.tag, + scheduledFor: '2026-05-12T03:01:00.000Z', + startedAt: '2026-05-11T10:00:00.000Z', + }, + }; + await saveState(stateFile, stateOnDisk); + const state = await loadState(stateFile); + + const fireTimeOutsideWindow = new Date('2026-05-12T06:00:00.000Z'); + const decision = decideTriggerApply({ + state, targetTag: release.tag, policy: policyAutonomous, + now: fireTimeOutsideWindow, maintenanceWindow: window, + }); + assert.equal(decision.action, 'defer'); + if (decision.action !== 'defer') return; + assert.equal(decision.nextStart, '2026-05-13T03:00:00.000Z'); + assert.equal(decision.reason, 'outside-maintenance-window'); + + // Runner-level behavior: persist the new scheduledFor. + if (state.execution.status !== 'scheduled') return; + await saveState(stateFile, { + ...state, + execution: {...state.execution, scheduledFor: decision.nextStart}, + }); + const reloaded = await loadState(stateFile); + if (reloaded.execution.status !== 'scheduled') return; + assert.equal(reloaded.execution.scheduledFor, '2026-05-13T03:00:00.000Z'); + }); + + it('cancel during deferred-grace: state returns to idle', async () => { + const stateOnDisk = { + ...EMPTY_STATE, + latest: release, + execution: { + status: 'scheduled' as const, targetTag: release.tag, + scheduledFor: '2026-05-12T03:00:00.000Z', + startedAt: '2026-05-11T10:00:00.000Z', + }, + }; + await saveState(stateFile, stateOnDisk); + + // Cancel happens via /admin/update/cancel; here we simulate the state + // transition the handler performs. + const state = await loadState(stateFile); + await saveState(stateFile, {...state, execution: {status: 'idle'}}); + + const reloaded = await loadState(stateFile); + assert.equal(reloaded.execution.status, 'idle'); + + // After cancel, the next periodic check would re-schedule (correct + // behavior — tier flip is the way to opt out). decideSchedule on the + // cancelled state should re-emit a schedule snapped to the next window. + const decision = decideSchedule({ + state: reloaded, now: new Date('2026-05-12T06:00:00.000Z'), + policy: policyAutonomous, latest: release, current: '2.0.0', + preApplyGraceMinutes: 0, adminEmail: null, maintenanceWindow: window, + }); + assert.equal(decision.action, 'schedule'); + if (decision.action !== 'schedule') return; + assert.equal(decision.newExecution.scheduledFor, '2026-05-13T03:00:00.000Z'); + }); +});