Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 26 additions & 2 deletions src/node/db/API.ts
Original file line number Diff line number Diff line change
Expand Up @@ -19,10 +19,15 @@
* limitations under the License.
*/

import AttributeMap from '../../static/js/AttributeMap';
import {deserializeOps} from '../../static/js/Changeset';
import ChatMessage from '../../static/js/ChatMessage';
import {Builder} from "../../static/js/Builder";
import {Attribute} from "../../static/js/types/Attribute";

// Mirror of `Pad.SYSTEM_AUTHOR_ID`. Inlined to avoid a circular load
// (API <-> Pad) at module init time.
const SYSTEM_AUTHOR_ID = 'a.etherpad-system';
import settings from '../utils/Settings';
const CustomError = require('../utils/customError');
const padManager = require('./PadManager');
Expand Down Expand Up @@ -620,9 +625,28 @@ exports.restoreRevision = async (padID: string, rev: number, authorId = '') => {
// create a new changeset with a helper builder object
const builder = new Builder(oldText.length);

// The author to attribute inserts to. If the caller supplied an
// explicit authorId, that wins; otherwise fall back to the stable
// system author. The replayed atext was built from historical
// revisions that may legitimately have insert ops without an
// author attribute (legacy server-internal flows / .etherpad
// imports); appendRevision now requires every insert to carry
// one, so we merge the marker in below.
const replayAuthorId = authorId || SYSTEM_AUTHOR_ID;

// assemble each line into the builder
eachAttribRun(atext.attribs, (start: number, end: number, attribs:Attribute[]) => {
builder.insert(atext.text.substring(start, end), attribs);
eachAttribRun(atext.attribs, (start: number, end: number, attribs:string) => {
// attribs here is the op.attribs *string* (the eachAttribRun
// callback receives it as the third arg). Use AttributeMap to
// merge in `author` while preserving canonical (sorted) order
// so checkRep doesn't reject the result. The `.set` call is a
// no-op when the existing attribs already contain an `author`
// attribute that matches; when they contain a *different*
// author it preserves the historical attribution (we only
// set author when it's missing).
const map = AttributeMap.fromString(attribs, pad.pool);
if (!map.get('author')) map.set('author', replayAuthorId);
builder.insert(atext.text.substring(start, end), map.toString());
});

const lastNewlinePos = oldText.lastIndexOf('\n');
Expand Down
97 changes: 94 additions & 3 deletions src/node/db/Pad.ts
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,58 @@ class Pad {
*/
static readonly SYSTEM_AUTHOR_ID = 'a.etherpad-system';

/**
* Validate that every `+` (insert) op in `aChangeset` carries an
* `author` attribute that resolves through `pool`. Callers that have
* already rebased onto pad.pool pass the post-rebase changeset, so
* we accept the pad's own pool here.
*
* Throws an Error if any insert op is missing an author attribute,
* carries an empty author, or references an attribute number that
* is not present in the pool.
*
* Tolerates `=` and `-` ops with empty attribs (those are the
* canonical form for keeps/deletes that don't change attribution).
* Also tolerates pure-newline `+` ops: the client's line assembler
* handles those regardless of attribs, and the API restoreRevision
* path emits them at line boundaries.
*/
private static _assertInsertOpsCarryAuthor(aChangeset: string, pool: AttributePool) {
let unpacked;
try {
unpacked = unpack(aChangeset);
} catch (e: any) {
// unpack already throws a descriptive error; rethrow as-is so the
// caller's failure mode stays the same.
throw e;
}
for (const op of deserializeOps(unpacked.ops)) {
if (op.opcode !== '+') continue;
// Pure-newline inserts (e.g. `|1+1` for a single line break) are
// tolerated — the client's line assembler handles them regardless
// of attribs, and the API restoreRevision path emits them at
// line boundaries.
if (op.lines > 0 && op.chars === op.lines) continue;
if (!op.attribs) {
throw new Error(
'insert op without an author attribute ' +
`(empty attribs): ${aChangeset}`);
}
let authorIdSeen: string | undefined;
try {
authorIdSeen = AttributeMap.fromString(op.attribs, pool).get('author');
} catch (e: any) {
throw new Error(
'insert op references an attribute number ' +
`not present in the pool: ${aChangeset} (${e && e.message || e})`);
}
if (!authorIdSeen) {
throw new Error(
'insert op without an author attribute: ' + aChangeset);
}
}
}

private db: Database;
private atext: AText;
private pool: AttributePool;
Expand Down Expand Up @@ -226,6 +278,13 @@ class Pad {
* @return {Promise<number|string>}
*/
async appendRevision(aChangeset:string, authorId = '') {
// Centralised "every insert op carries an author attribute"
// invariant. The socket handler enforces the same rule at the wire
// boundary; checking here covers the non-wire callers (HTTP API
// setHTML/setText/restoreRevision, plugin paths that call
// appendRevision directly).
Pad._assertInsertOpsCarryAuthor(aChangeset, this.pool);

Comment on lines +281 to +287
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Action required

1. Legacy restore/copy breaks 🐞 Bug ≡ Correctness

Pad.appendRevision now throws if any insert op lacks an author attribute, but restoreRevision()
and copyPadWithoutHistory() rebuild changesets from stored attrib strings that can legitimately omit
author on legacy pads (notably .etherpad imports and older server-internal flows). Those APIs can
therefore start failing at runtime for existing pads with historical unattributed inserts.
Agent Prompt
## Issue description
`Pad.appendRevision()` now enforces that every `+` op has an `author` attribute, but some existing stored pads (legacy `.etherpad` imports and historical server-internal flows) can contain unattributed insert ops. Code paths that *replay* stored content as new changesets (notably `API.restoreRevision()` and `Pad.copyPadWithoutHistory()`) will start throwing when they encounter such legacy attribution.

## Issue Context
- `Pad.appendRevision()` now calls `_assertInsertOpsCarryAuthor()`.
- `pad.check()` explicitly documents that historical stored data can violate this invariant.
- `API.restoreRevision()` inserts text using historical `op.attribs` verbatim.
- `Pad.copyPadWithoutHistory()` packs ops from `opsFromAText(oldAText)` which can also propagate missing authors.

## Fix Focus Areas
- src/node/db/API.ts[587-640]
- src/node/db/Pad.ts[280-334]
- src/node/db/Pad.ts[714-753]

### Implementation sketch
1. In `restoreRevision()`: when iterating over `atext.attribs`, detect inserts whose attrib string lacks `author`. Merge in an author attribute (prefer `authorId` if provided, else `Pad.SYSTEM_AUTHOR_ID`) using `AttributeMap.fromString(..., pad.pool)` and `.set('author', ...)` to keep canonical order.
2. In `copyPadWithoutHistory()`: before calling `dstPad.appendRevision(...)`, ensure the packed ops string has `author` on every insert op (same merge strategy; prefer passed `authorId` or system author for missing segments).
3. Add a regression test that restores/copies a pad containing a stored revision with an insert op missing `author` (simulating legacy data) and assert the operation succeeds and produces canonical attribs.

ⓘ Copy this prompt and use it to remediate the issue with your preferred AI generation tools

const newAText = applyToAText(aChangeset, this.atext, this.pool);
if (newAText.text === this.atext.text && newAText.attribs === this.atext.attribs &&
this.head !== -1) {
Expand Down Expand Up @@ -537,9 +596,19 @@ class Pad {
if (context.type !== 'text') throw new Error(`unsupported content type: ${context.type}`);
text = exports.cleanText(context.content);
}
const firstAttribs = authorId ? [['author', authorId] as [string, string]] : undefined;
// When the initial pad text is non-empty but no authorId was
// supplied (internal getPad calls during HTTP API setup,
// padDefaultContent flows, plugin-driven pad creation), fall back
// to the stable system author so the initial changeset's insert
// op carries an `author` attribute. Mirrors the same substitution
// setText/appendText already do via spliceText.
const effectiveAuthorId =
(text.length > 0 && !authorId) ? Pad.SYSTEM_AUTHOR_ID : authorId;
const firstAttribs = effectiveAuthorId
? [['author', effectiveAuthorId] as [string, string]]
: undefined;
const firstChangeset = makeSplice('\n', 0, 0, text, firstAttribs, this.pool);
await this.appendRevision(firstChangeset, authorId);
await this.appendRevision(firstChangeset, effectiveAuthorId);
}
this.padSettings = Pad.normalizePadSettings(this.padSettings);
await hooks.aCallAll('padLoad', {pad: this});
Expand Down Expand Up @@ -665,9 +734,25 @@ class Pad {

const oldAText = this.atext;

// The author to attribute inserts to when the historical op lacks
// one (legacy server-internal flows / .etherpad imports). Caller-
// supplied authorId wins; otherwise the stable system author.
// appendRevision now requires every insert to carry an author, so
// unattributed ops in the source pad would otherwise throw here.
const replayAuthorId = authorId || Pad.SYSTEM_AUTHOR_ID;

// based on Changeset.makeSplice
const assem = new SmartOpAssembler();
for (const op of opsFromAText(oldAText)) assem.append(op);
for (const op of opsFromAText(oldAText)) {
if (op.opcode === '+') {
const map = AttributeMap.fromString(op.attribs, dstPad.pool);
if (!map.get('author')) {
map.set('author', replayAuthorId);
op.attribs = map.toString();
}
}
assem.append(op);
}
assem.endDocument();

// although we have instantiated the dstPad with '\n', an additional '\n' is
Expand Down Expand Up @@ -867,6 +952,12 @@ class Pad {
assert(changeset != null);
assert.equal(typeof changeset, 'string');
checkRep(changeset);
// NOTE: pad.check() intentionally does not invoke
// _assertInsertOpsCarryAuthor — it runs against historical
// stored data (including legacy .etherpad files) where some
// server-internal flows did not previously substitute the
// system author. The write-time guard in appendRevision is
// where the invariant is enforced for new content.
const unpacked = unpack(changeset);
let text = atext.text;
for (const op of deserializeOps(unpacked.ops)) {
Expand Down
42 changes: 28 additions & 14 deletions src/node/handler/APIHandler.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
*/

import {MapArrayType} from "../types/MapType";
import { jwtDecode } from "jwt-decode";
const api = require('../db/API');
const padManager = require('../db/PadManager');
import settings from '../utils/Settings';
Expand All @@ -29,6 +28,7 @@ import {Http2ServerRequest} from "node:http2";
import {publicKeyExported} from "../security/OAuth2Provider";
import {jwtVerify} from "jose";
import {APIFields, apikey} from './APIKeyHandler'
import crypto from 'node:crypto';
// a list of all functions
const version:MapArrayType<any> = {};

Expand Down Expand Up @@ -179,27 +179,41 @@ exports.handle = async function (apiVersion: string, functionName: string, field

if (apikey !== null && apikey.trim().length > 0) {
fields.apikey = fields.apikey || fields.api_key || fields.authorization;
// API key is configured, check if it is valid
if (fields.apikey !== apikey!.trim()) {
// Constant-time compare — see crypto.timingSafeEqual docs.
const provided = Buffer.from(String(fields.apikey ?? ''), 'utf8');
const want = Buffer.from(apikey!.trim(), 'utf8');
const ok = provided.length === want.length &&
crypto.timingSafeEqual(provided, want);
if (!ok) {
throw new createHTTPError.Unauthorized('no or wrong API Key');
}
} else {
if(!req.headers.authorization) {
if (!req.headers.authorization) {
throw new createHTTPError.Unauthorized('no or wrong API Key');
}
try {
const clientIds: string[] = settings.sso.clients?.map((client: {client_id: string}) => client.client_id) ?? [];
const jwtToCheck = req.headers.authorization.replace("Bearer ", "")
const payload = jwtDecode(jwtToCheck)
// client_credentials
if (clientIds.includes(<string>payload.sub)) {
await jwtVerify(jwtToCheck, publicKeyExported!, {algorithms: ['RS256']})
} else {
// authorization_code
await jwtVerify(jwtToCheck, publicKeyExported!, {algorithms: ['RS256'],
requiredClaims: ["admin"]})
const clientIds: string[] = settings.sso.clients?.map(
(client: {client_id: string}) => client.client_id) ?? [];
const jwtToCheck = req.headers.authorization.replace('Bearer ', '');
// Verify the JWT signature first, then read claims off the verified
// payload only.
const {payload: verified} = await jwtVerify(
jwtToCheck, publicKeyExported!, {algorithms: ['RS256']});
const isClientCredentials =
clientIds.includes(verified.sub as string);
if (!isClientCredentials) {
// authorization_code branch: require the admin claim to be
// strictly true. Checking only that the claim is present is not
// sufficient — the provider issues it as `admin: is_admin`, so
// non-admin users would have it set to false.
if (verified.admin !== true) {
throw new createHTTPError.Unauthorized(
'admin claim missing or not true');
}
}
} catch (e) {
// Single error string regardless of the underlying failure so we
// don't leak which check rejected the token.
throw new createHTTPError.Unauthorized('no or wrong OAuth token');
}
}
Expand Down
6 changes: 4 additions & 2 deletions src/node/handler/ExportHandler.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
const exporthtml = require('../utils/ExportHtml');
const exporttxt = require('../utils/ExportTxt');
const exportEtherpad = require('../utils/ExportEtherpad');
import crypto from 'node:crypto';
import fs from 'fs';
import settings from '../utils/Settings';
import os from 'os';
Expand Down Expand Up @@ -155,8 +156,9 @@ exports.doExport = async (req: any, res: any, padId: string, readOnlyId: string,
}
}

// soffice path — write the html export to a file
const randNum = Math.floor(Math.random() * 0xFFFFFFFF);
// soffice path — write the html export to a file. Use CSPRNG output
// for the temp path token (see matching note in ImportHandler.ts).
const randNum = crypto.randomBytes(16).toString('hex');
const srcFile = `${tempDirectory}/etherpad_export_${randNum}.html`;
await fsp_writeFile(srcFile, html);

Expand Down
6 changes: 5 additions & 1 deletion src/node/handler/ImportHandler.ts
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@

const padManager = require('../db/PadManager');
const padMessageHandler = require('./PadMessageHandler');
import crypto from 'node:crypto';
import {promises as fs} from 'fs';
import path from 'path';
import settings from '../utils/Settings';
Expand Down Expand Up @@ -83,7 +84,10 @@ const doImport = async (req:any, res:any, padId:string, authorId:string) => {
// pipe to a file
// convert file to html via soffice
// set html in the pad
const randNum = Math.floor(Math.random() * 0xFFFFFFFF);
//
// Use CSPRNG output for the temp path token so the destination path
// can't be predicted by another process on the same host.
const randNum = crypto.randomBytes(16).toString('hex');

// setting flag for whether to use converter or not
let useConverter = (converter != null);
Expand Down
15 changes: 12 additions & 3 deletions src/node/hooks/express/admin.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import fs from "fs";
import {MapArrayType} from "../../types/MapType";

import settings from 'ep_etherpad-lite/node/utils/Settings';
import {sanitizeProxyPath} from '../../utils/sanitizeProxyPath';

const ADMIN_PATH = path.join(settings.root, 'src', 'templates');
const PROXY_HEADER = "x-proxy-path"
Expand Down Expand Up @@ -72,11 +73,19 @@ exports.expressCreateServer = (hookName: string, args: ArgsExpressType, cb: Func
// if the file is found, set Content-type and send data
res.setHeader('Content-type', map[ext] || 'text/plain');
if (ext === ".html" || ext === ".js" || ext === ".css") {
if (req.header(PROXY_HEADER)) {
// The proxy-path header is woven into the response body, so
// it must be sanitised before substitution and downstream
// caches must not collapse responses across different
// header values.
const proxyPath = sanitizeProxyPath(req);
if (proxyPath) {
let string = data.toString()
dataToSend = string.replaceAll("/admin", req.header(PROXY_HEADER) + "/admin")
dataToSend = dataToSend.replaceAll("/socket.io", req.header(PROXY_HEADER) + "/socket.io")
dataToSend = string.replaceAll("/admin", proxyPath + "/admin")
dataToSend = dataToSend.replaceAll(
"/socket.io", proxyPath + "/socket.io")
}
res.setHeader('Vary', 'x-proxy-path');
res.setHeader('Cache-Control', 'private, no-store');
}
res.end(dataToSend);
}
Expand Down
10 changes: 4 additions & 6 deletions src/node/hooks/express/specialpages.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,10 @@ import prometheus from "../../prometheus";

let ioI: { sockets: { sockets: any[]; }; } | null = null

// Sanitize x-proxy-path header to prevent XSS via header injection.
// Only allow path-like characters (letters, digits, hyphens, underscores, slashes, dots).
const sanitizeProxyPath = (req: any): string => {
const raw = req.header('x-proxy-path') || '';
return raw.replace(/[^a-zA-Z0-9\-_\/\.]/g, '');
};
// Shared sanitizer for the `x-proxy-path` header. See the helper for the
// allowed character class and the protocol-relative / traversal rejection
// rules. Reused by admin.ts so both call sites share one definition.
import {sanitizeProxyPath} from '../../utils/sanitizeProxyPath';


exports.socketio = (hookName: string, {io}: any) => {
Expand Down
Loading
Loading