From 63a00c9ffc12dff95e1af27d0d959f96b0afdbfe Mon Sep 17 00:00:00 2001 From: shunkica Date: Tue, 24 Jun 2025 14:15:30 +0200 Subject: [PATCH 01/11] wip: add support for C14N --- binding/exported-functions.txt | 9 + binding/exported-runtime-functions.txt | 1 + src/c14n.mts | 368 +++++++++++++++++++++++++ src/document.mts | 39 +++ src/libxml2.mts | 94 ++++++- src/libxml2raw.d.mts | 31 +++ src/nodes.mts | 2 +- src/utils.mts | 92 ++++--- test/crossplatform/c14n.spec.mts | 167 +++++++++++ 9 files changed, 770 insertions(+), 33 deletions(-) create mode 100644 src/c14n.mts create mode 100644 test/crossplatform/c14n.spec.mts diff --git a/binding/exported-functions.txt b/binding/exported-functions.txt index 900a6c5..1aa720e 100644 --- a/binding/exported-functions.txt +++ b/binding/exported-functions.txt @@ -4,12 +4,14 @@ _xmlAddChild _xmlAddNextSibling _xmlAddPrevSibling _xmlCleanupInputCallbacks +_xmlCopyNode _xmlCtxtParseDtd _xmlCtxtReadMemory _xmlCtxtSetErrorHandler _xmlCtxtValidateDtd _xmlDocGetRootElement _xmlDocSetRootElement +_xmlFree _xmlFreeDoc _xmlFreeDtd _xmlFreeNode @@ -74,3 +76,10 @@ _xmlXPathFreeObject _xmlXPathNewContext _xmlXPathRegisterNs _xmlXPathSetContextNode +_xmlC14NDocDumpMemory +_xmlC14NExecute +_xmlBufferCreate +_xmlOutputBufferCreateBuffer +_xmlBufferContent +_xmlOutputBufferClose +_xmlBufferFree diff --git a/binding/exported-runtime-functions.txt b/binding/exported-runtime-functions.txt index 7d758a2..0d995f3 100644 --- a/binding/exported-runtime-functions.txt +++ b/binding/exported-runtime-functions.txt @@ -3,5 +3,6 @@ HEAPU8 UTF8ToString addFunction getValue +setValue lengthBytesUTF8 stringToUTF8 diff --git a/src/c14n.mts b/src/c14n.mts new file mode 100644 index 0000000..757d046 --- /dev/null +++ b/src/c14n.mts @@ -0,0 +1,368 @@ +import { + addFunction, + getValue, DisposableMalloc, + UTF8ToString, xmlC14NDocDumpMemory, xmlC14NExecute, xmlCopyNode, + xmlDocSetRootElement, + XmlError, + xmlFree, + xmlFreeDoc, + xmlNewDoc, + xmlNewNs, + XmlOutputBufferHandler, DisposableXmlOutputBuffer, ContextStorage, +} from './libxml2.mjs'; +import { createNode, XmlElement, type XmlNode } from './nodes.mjs'; +import type { XmlDocPtr } from './libxml2raw.mjs'; +import { + CStringArrayWrapper, XmlNodeSetWrapper, +} from './utils.mjs'; +import { Pointer } from './libxml2raw.mjs'; + +export const XmlC14NMode = { + XML_C14N_1_0: 0, + XML_C14N_EXCLUSIVE_1_0: 1, + XML_C14N_1_1: 2, +} as const; + +export type C14NOptionsBase = { + /** The canonicalization mode to use + * @see {@link XmlC14NMode} + */ + mode: typeof XmlC14NMode[keyof typeof XmlC14NMode]; + /** The list of inclusive namespace prefixes (only for exclusive canonicalization) */ + inclusiveNamespacePrefixList?: string[]; + /** Whether to include comments in the canonicalized output + * @default false + */ + withComments?: boolean; +}; + +export type C14NOptionsDocument = C14NOptionsBase & { + node?: never; + nodeSet?: never; + isVisibleCallback?: never; + userData?: never; +}; + +export type C14NOptionsNode = C14NOptionsBase & { + node: XmlNode; + nodeSet?: never; + isVisibleCallback?: never; + userData?: never; +}; + +export type C14NOptionsNodeSet = C14NOptionsBase & { + nodeSet: XmlNode[]; + node?: never; + isVisibleCallback?: never; + userData?: never; +}; + +export type C14NOptionsCallback = C14NOptionsBase & { + node?: never; + nodeSet?: never; + isVisibleCallback: XmlC14NIsVisibleCallback; + userData?: T; +}; + +export type C14NOptions = + C14NOptionsDocument | C14NOptionsNode | C14NOptionsNodeSet | C14NOptionsCallback; + +/** + * Decide if a node should be included in the canonicalization. + */ +export type XmlC14NIsVisibleCallback = (userData: T, node: XmlNode, parent: XmlNode) => boolean; + +/** + * wrap the users is visible function + */ +export function getC14NIsVisibleCallback( + cb: XmlC14NIsVisibleCallback, + contextStorage: ContextStorage | null, +): Pointer { + const wrapper = (userDataPtr: number, nodePtr: number, parentPtr: number): number => { + const node = createNode(nodePtr); + const parent = createNode(parentPtr); + const userDataObj = contextStorage ? contextStorage.get(userDataPtr) : undefined; + return cb(userDataObj as T, node, parent) ? 1 : 0; + }; + const funcPtr = addFunction(wrapper, 'iiii'); + return funcPtr as Pointer; +} + +/** + * Canonicalize an XML document with a specific node + */ +export function canonicalizeWithNode( + docPtr: XmlDocPtr, + handler: XmlOutputBufferHandler, + options: C14NOptionsNode, +): void { + using docTxtMem = new DisposableMalloc(4); + let tempDoc: number | null = null; + let prefixArray: CStringArrayWrapper | null = null; + + try { + // If inclusiveNamespaces is provided + if (options.inclusiveNamespacePrefixList) { + prefixArray = new CStringArrayWrapper(options.inclusiveNamespacePrefixList); + } + + // Create a temporary document for the subtree + tempDoc = xmlNewDoc(); + if (!tempDoc) { + throw new XmlError('Failed to create new document for subtree'); + } + + // Make a deep copy of the node (1 = recursive copy) + const copiedNode = xmlCopyNode(options.node._nodePtr, 1); + if (!copiedNode) { + throw new XmlError('Failed to copy subtree node'); + } + + // Set the copied node as the root element of the new document + xmlDocSetRootElement(tempDoc, copiedNode); + + // If inclusiveNamespaces is provided, + // we need to add the namespace declarations to the root element + const inclusivePrefixes = options.inclusiveNamespacePrefixList; + if (inclusivePrefixes) { + let currentNode: XmlElement | null = options.node.parent; + while (currentNode) { + Object.entries(currentNode.nsDeclarations).forEach( + ([prefix, namespaceURI]) => { + if (inclusivePrefixes.includes(prefix)) { + const namespace = xmlNewNs(copiedNode, namespaceURI, prefix); + if (!namespace) { + throw new XmlError(`Failed to add namespace declaration "${prefix}"`); + } + } + }, + ); + currentNode = currentNode.parent; + } + } + + const mode = options.mode ?? XmlC14NMode.XML_C14N_1_0; + const withComments = options.withComments ? 1 : 0; + + const result = xmlC14NDocDumpMemory( + tempDoc, + 0, // no nodeSet for single node + mode, + prefixArray ? prefixArray._ptr : 0, + withComments, + docTxtMem._ptr, + ); + + if (result < 0) { + throw new XmlError('Failed to canonicalize XML subtree'); + } + + const txtPtr = getValue(docTxtMem._ptr, 'i32'); + if (!txtPtr) throw new XmlError('Failed to get canonicalized XML'); + + const canonicalXml = UTF8ToString(txtPtr, result); + const buffer = new TextEncoder().encode(canonicalXml); + handler.write(buffer); + + xmlFree(txtPtr); + } finally { + if (tempDoc) { + xmlFreeDoc(tempDoc); + } + if (prefixArray) { + prefixArray.dispose(); + } + } +} + +/** + * Canonicalize an XML document with a node set + * + * TODO: I can't figure out how to add namespace nodes to the node set. + * (Error: Unsupported node type 18) + */ +export function canonicalizeWithNodeSet( + docPtr: XmlDocPtr, + handler: XmlOutputBufferHandler, + options: C14NOptionsNodeSet, +): void { + using docTxtPtr = new DisposableMalloc(4); + let prefixArray: CStringArrayWrapper | null = null; + let nodeSet: XmlNodeSetWrapper | null = null; + + try { + // If inclusiveNamespaces is provided + if (options.inclusiveNamespacePrefixList) { + prefixArray = new CStringArrayWrapper(options.inclusiveNamespacePrefixList); + } + + // Create nodeSet wrapper + nodeSet = new XmlNodeSetWrapper(options.nodeSet.map((item) => item._nodePtr)); + + const mode = options.mode ?? XmlC14NMode.XML_C14N_1_0; + const withComments = options.withComments ? 1 : 0; + + const result = xmlC14NDocDumpMemory( + docPtr, + nodeSet._ptr, + mode, + prefixArray ? prefixArray._ptr : 0, + withComments, + docTxtPtr._ptr, + ); + + if (result < 0) { + throw new XmlError('Failed to canonicalize XML with node set'); + } + + const txtPtr = getValue(docTxtPtr._ptr, 'i32'); + if (!txtPtr) throw new XmlError('Failed to get canonicalized XML'); + + const canonicalXml = UTF8ToString(txtPtr, result); + const buffer = new TextEncoder().encode(canonicalXml); + handler.write(buffer); + + xmlFree(txtPtr); + } finally { + if (prefixArray) { + prefixArray.dispose(); + } + if (nodeSet) { + nodeSet.dispose(); + } + } +} + +/** + * Canonicalize an XML document with a callback + */ +export function canonicalizeWithCallback( + docPtr: XmlDocPtr, + handler: XmlOutputBufferHandler, + options: C14NOptionsCallback, +): void { + using outputBuffer = new DisposableXmlOutputBuffer(); + let prefixArray: CStringArrayWrapper | null = null; + let contextStorage: ContextStorage | null = null; + let callbackPtr: Pointer | null = null; + let userDataPtr = 0; + + try { + // If inclusiveNamespaces is provided + if (options.inclusiveNamespacePrefixList) { + prefixArray = new CStringArrayWrapper(options.inclusiveNamespacePrefixList); + } + + // Set up callback and user data + if (options.userData !== undefined) { + contextStorage = new ContextStorage(); + userDataPtr = contextStorage.allocate(options.userData); + } + + callbackPtr = getC14NIsVisibleCallback(options.isVisibleCallback, contextStorage); + + const withComments = options.withComments ? 1 : 0; + + const result = xmlC14NExecute( + docPtr, + callbackPtr, + userDataPtr, + options.mode, + prefixArray ? prefixArray._ptr : 0, + withComments, + outputBuffer.getOutputBufferPtr(), + ); + + if (result < 0) { + throw new XmlError('Failed to canonicalize XML with callback'); + } + + const caninicalizedXml = outputBuffer.getContent(); + + // TODO: handle this better + handler.write(Buffer.from(caninicalizedXml)); + } finally { + if (prefixArray) { + prefixArray.dispose(); + } + if (contextStorage) { + contextStorage.free(userDataPtr); + } + } +} + +/** + * Canonicalize an XML document (default mode - entire document) + */ +export function canonicalizeDocument( + docPtr: XmlDocPtr, + handler: XmlOutputBufferHandler, + options?: C14NOptionsBase, +): void { + using docTxtPtr = new DisposableMalloc(4); + let prefixArray: CStringArrayWrapper | null = null; + + try { + // If inclusiveNamespaces is provided + if (options && options.inclusiveNamespacePrefixList) { + prefixArray = new CStringArrayWrapper(options.inclusiveNamespacePrefixList); + } + + const mode = options && options.mode ? options.mode : XmlC14NMode.XML_C14N_1_0; + const withComments = options && options.withComments ? 1 : 0; + + const result = xmlC14NDocDumpMemory( + docPtr, + 0, // no nodeSet + mode, + prefixArray ? prefixArray._ptr : 0, + withComments, + docTxtPtr._ptr, + ); + + if (result < 0) { + throw new XmlError('Failed to canonicalize XML'); + } + + const txtPtr = getValue(docTxtPtr._ptr, 'i32'); + if (!txtPtr) throw new XmlError('Failed to get canonicalized XML'); + + const canonicalXml = UTF8ToString(txtPtr, result); + const buffer = new TextEncoder().encode(canonicalXml); + handler.write(buffer); + + xmlFree(txtPtr); + } finally { + if (prefixArray) { + prefixArray.dispose(); + } + } +} + +// export function onlyATest(): string { +// const xmlString = 'text'; +// const doc = XmlDocument.fromString(xmlString); +// +// const buf = xmlBufferCreate(); +// const bufbuf = xmlOutputBufferCreateBuffer(buf, 0); +// +// const canonical = xmlC14NExecute( +// doc._ptr, +// 0, +// 0, +// 0, +// 0, +// 0, +// bufbuf, +// ); +// const errPtr = xmlGetLastError(); +// if (errPtr) { +// const code = getValue(errPtr + 16, 'i32'); // offset depends on struct layout +// const msgPtr = getValue(errPtr + 8, '*'); // check xmlError struct in libxml2 +// const msg = UTF8ToString(msgPtr); +// console.error('C14N error:', code, msg); +// } +// +// return canonical.toString(); +// } diff --git a/src/document.mts b/src/document.mts index 0d481c4..1412ecc 100644 --- a/src/document.mts +++ b/src/document.mts @@ -32,6 +32,13 @@ import type { XmlDocPtr, XmlParserCtxtPtr } from './libxml2raw.mjs'; import { disposeBy, XmlDisposable } from './disposable.mjs'; import { XmlDtd } from './dtd.mjs'; import { XmlStringOutputBufferHandler } from './utils.mjs'; +import { + canonicalizeDocument, + canonicalizeWithCallback, + canonicalizeWithNode, + canonicalizeWithNodeSet, + type C14NOptions, +} from './c14n.mjs'; export enum ParseOption { XML_PARSE_DEFAULT = 0, @@ -494,4 +501,36 @@ export class XmlDocument extends XmlDisposable { xmlXIncludeFreeContext(xinc); } } + + /** + * Canonicalize the XML document to a buffer and invoke the callbacks to process. + * @param handler handlers to process the content in the buffer + * @param options Canonicalization options + * @see {@link toCanonicalString} + */ + canonicalize(handler: XmlOutputBufferHandler, options?: C14NOptions): void { + if (!options) { + canonicalizeDocument(this._ptr, handler); + } else if (options.node) { + canonicalizeWithNode(this._ptr, handler, options); + } else if (options.nodeSet) { + canonicalizeWithNodeSet(this._ptr, handler, options); + } else if (options.isVisibleCallback) { + canonicalizeWithCallback(this._ptr, handler, options); + } else { + canonicalizeDocument(this._ptr, handler, options); + } + } + + /** + * Canonicalize the XML document and return the result as a string. + * @param options Canonicalization options + * @returns The canonicalized XML as a string + * @see {@link canonicalize} + */ + toCanonicalString(options?: C14NOptions): string { + const handler = new XmlStringOutputBufferHandler(); + this.canonicalize(handler, options); + return handler.result; + } } diff --git a/src/libxml2.mts b/src/libxml2.mts index e1dea8c..669816e 100644 --- a/src/libxml2.mts +++ b/src/libxml2.mts @@ -13,11 +13,46 @@ import type { XmlXPathContextPtr, } from './libxml2raw.mjs'; import moduleLoader from './libxml2raw.mjs'; -import { ContextStorage } from './utils.mjs'; +import { disposeBy, XmlDisposable } from './disposable.mjs'; const libxml2 = await moduleLoader(); libxml2._xmlInitParser(); +// Export specific functions needed by other modules +export const { + getValue, setValue, UTF8ToString, lengthBytesUTF8, stringToUTF8, addFunction, +} = libxml2; + +/** + * Manage JS context object for wasm. + * + * In libxml2, a registration of callback often has a context/userdata pointer. + * But when it is in wasm, this pointer is essentially an integer. + * + * To support JS object as context/userdata, we store it in the map and access with an integer key. + * This key could be passed to the registration. + * And the callback use this key to retrieve the real object. + */ +export class ContextStorage { + private storage: Map = new Map(); + + private index = 0; + + allocate(value: T): number { + this.index += 1; + this.storage.set(this.index, value); + return this.index; + } + + free(index: number) { + this.storage.delete(index); + } + + get(index: number): T { + return this.storage.get(index)!; + } +} + /** * The base class for exceptions in this library. * @@ -618,13 +653,68 @@ export function xmlSaveSetIndentString( return withStringUTF8(indent, (buf) => libxml2._xmlSaveSetIndentString(ctxt, buf)); } +/** + * We probably don't want to expose malloc/free directly? + */ +@disposeBy(libxml2._free) +export class DisposableMalloc extends XmlDisposable { + constructor(size: number) { + super(libxml2._malloc(size)); + } +} + +/** + * Maybe also don't expose xmlBuffer* functions directly? + * Don't reuse this buffer. + */ +@disposeBy(libxml2._xmlBufferFree) +export class DisposableXmlOutputBuffer extends XmlDisposable { + private _content: string | null = null; + + private _outputBufferPtr: number; + + constructor() { + super(libxml2._xmlBufferCreate()); + this._outputBufferPtr = libxml2._xmlOutputBufferCreateBuffer(this._ptr, 0); + } + + getOutputBufferPtr(): Pointer { + return this._outputBufferPtr; + } + + // closes the buffer and gets is content as string. + getContent(): string { + if (this._content) { + return this._content; + } + if (this._outputBufferPtr === 0) { + throw new XmlError('Output buffer has been closed'); + } + libxml2._xmlOutputBufferClose(this._outputBufferPtr); + this._outputBufferPtr = 0; + const contentPtr = libxml2._xmlBufferContent(this._ptr); + this._content = libxml2.UTF8ToString(contentPtr); + return this._content; + } + + [Symbol.dispose]() { + if (this._outputBufferPtr !== 0) { + libxml2._xmlOutputBufferClose(this._outputBufferPtr); + this._outputBufferPtr = 0; + } + super[Symbol.dispose](); + } +} + export const xmlAddChild = libxml2._xmlAddChild; export const xmlAddNextSibling = libxml2._xmlAddNextSibling; export const xmlAddPrevSibling = libxml2._xmlAddPrevSibling; +export const xmlCopyNode = libxml2._xmlCopyNode; export const xmlCtxtSetErrorHandler = libxml2._xmlCtxtSetErrorHandler; export const xmlCtxtValidateDtd = libxml2._xmlCtxtValidateDtd; export const xmlDocGetRootElement = libxml2._xmlDocGetRootElement; export const xmlDocSetRootElement = libxml2._xmlDocSetRootElement; +export const xmlFree = libxml2._xmlFree; export const xmlFreeDoc = libxml2._xmlFreeDoc; export const xmlFreeNode = libxml2._xmlFreeNode; export const xmlFreeDtd = libxml2._xmlFreeDtd; @@ -669,3 +759,5 @@ export const xmlXPathFreeContext = libxml2._xmlXPathFreeContext; export const xmlXPathFreeObject = libxml2._xmlXPathFreeObject; export const xmlXPathNewContext = libxml2._xmlXPathNewContext; export const xmlXPathSetContextNode = libxml2._xmlXPathSetContextNode; +export const xmlC14NDocDumpMemory = libxml2._xmlC14NDocDumpMemory; +export const xmlC14NExecute = libxml2._xmlC14NExecute; diff --git a/src/libxml2raw.d.mts b/src/libxml2raw.d.mts index b41f2f0..0295aa2 100644 --- a/src/libxml2raw.d.mts +++ b/src/libxml2raw.d.mts @@ -39,6 +39,7 @@ export class LibXml2 { _xmlAddNextSibling(prev: XmlNodePtr, cur: XmlNodePtr): XmlNodePtr; _xmlAddPrevSibling(next: XmlNodePtr, cur: XmlNodePtr): XmlNodePtr; _xmlCleanupInputCallbacks(): void; + _xmlCopyNode(node: XmlNodePtr, extended: number): XmlNodePtr; _xmlCtxtParseDtd( ctxt: XmlParserCtxtPtr, input: XmlParserInputPtr, @@ -63,6 +64,7 @@ export class LibXml2 { _xmlFreeParserCtxt(ctxt: XmlParserCtxtPtr): void; _xmlDocGetRootElement(doc: XmlDocPtr): XmlNodePtr; _xmlDocSetRootElement(doc: XmlDocPtr, root: XmlNodePtr): XmlNodePtr; + _xmlFree(ptr: Pointer): void; _xmlFreeDoc(Doc: XmlDocPtr): void; _xmlFreeDtd(dtd: XmlDtdPtr): void; _xmlGetIntSubset(doc: XmlDocPtr): XmlDtdPtr; @@ -71,6 +73,7 @@ export class LibXml2 { _xmlHasNsProp(node: XmlNodePtr, name: CString, namespace: CString): XmlAttrPtr; _xmlInitParser(): void; _xmlNewDoc(): XmlDocPtr; + _xmlNewDtd(): XmlDtdPtr; _xmlNewCDataBlock(doc: XmlDocPtr, content: CString, len: number): XmlNodePtr; _xmlNewDocComment(doc: XmlDocPtr, content: CString): XmlNodePtr; _xmlNewDocNode(doc: XmlDocPtr, ns: XmlNsPtr, name: CString, content: CString): XmlNodePtr; @@ -160,10 +163,38 @@ export class LibXml2 { _xmlSchemaValidateDoc(ctx: XmlSchemaValidCtxtPtr, doc: XmlDocPtr): number; _xmlSchemaValidateOneElement(ctx: XmlSchemaValidCtxtPtr, elem: XmlNodePtr): number; _xmlUnlinkNode(cur: XmlNodePtr): void; + _xmlC14NDocDumpMemory( + doc: XmlDocPtr, + nodeset: Pointer, + mode: number, + inclusiveNamespaces: Pointer, + withComments: number, + docTxtPtr: Pointer, + ): number; + _xmlC14NExecute( + doc: XmlDocPtr, + is_visible_callback: Pointer, + user_data: Pointer, + mode: number, + inclusive_ns_prefixes: Pointer, + with_comments: number, + buf: Pointer, + ): number; + // _xmlBufferCreate + // _xmlOutputBufferCreateBuffer + // _xmlBufferContent + // _xmlOutputBufferClose + // _xmlBufferFree + _xmlBufferCreate(): Pointer; + _xmlOutputBufferCreateBuffer(buffer: Pointer, encoder: Pointer): Pointer; + _xmlBufferContent(buffer: Pointer): Pointer; + _xmlOutputBufferClose(outputBuffer: Pointer): number; + _xmlBufferFree(buffer: Pointer): void; // runtime functions UTF8ToString(ptr: CString, maxBytesToRead?: number): string; addFunction(func: Function, sig: string): Pointer; getValue(ptr: Pointer, type: string): number; + setValue(ptr: Pointer, value: number, type: string): void; lengthBytesUTF8(str: string): number; stringToUTF8(str: string, outPtr: CString, maxBytesToWrite: number): CString; } diff --git a/src/nodes.mts b/src/nodes.mts index edab129..140c564 100644 --- a/src/nodes.mts +++ b/src/nodes.mts @@ -88,7 +88,7 @@ export function forNodeType(nodeType: XmlNodeStruct.Type) { }; } -function createNode(nodePtr: XmlNodePtr): XmlNode { +export function createNode(nodePtr: XmlNodePtr): XmlNode { const nodeType = XmlNodeStruct.type(nodePtr); const Constructor = nodeConstructors.get(nodeType); diff --git a/src/utils.mts b/src/utils.mts index 601ad32..4073c19 100644 --- a/src/utils.mts +++ b/src/utils.mts @@ -1,36 +1,9 @@ -import { XmlInputProvider, XmlOutputBufferHandler } from './libxml2.mjs'; +import { + DisposableMalloc, + lengthBytesUTF8, setValue, stringToUTF8, XmlInputProvider, XmlOutputBufferHandler, +} from './libxml2.mjs'; import { Pointer } from './libxml2raw.mjs'; -/** - * Manage JS context object for wasm. - * - * In libxml2, a registration of callback often has a context/userdata pointer. - * But when it is in wasm, this pointer is essentially an integer. - * - * To support JS object as context/userdata, we store it in the map and access with an integer key. - * This key could be passed to the registration. - * And the callback use this key to retrieve the real object. - */ -export class ContextStorage { - private storage: Map = new Map(); - - private index = 0; - - allocate(value: T): number { - this.index += 1; - this.storage.set(this.index, value); - return this.index; - } - - free(index: number) { - this.storage.delete(index); - } - - get(index: number): T { - return this.storage.get(index)!; - } -} - const bufferContexts: Map = new Map(); let contextIndex = 1; @@ -142,3 +115,60 @@ export class XmlStringOutputBufferHandler implements XmlOutputBufferHandler { return this._result; } } + +/** + * Helper to create a C-style array of C strings + */ +export class CStringArrayWrapper extends DisposableMalloc { + private cStrings: DisposableMalloc[] = []; + + constructor(strings: string[]) { + // allocate pointer array (+1 for NULL terminator) + super((strings.length + 1) * 4); + + this.cStrings = strings.map((s) => { + const len = lengthBytesUTF8(s) + 1; + const mem = new DisposableMalloc(len); + stringToUTF8(s, mem._ptr, len); + return mem; + }); + + this.cStrings.forEach(({ _ptr }, i) => { + setValue(this._ptr + i * 4, _ptr, 'i32'); + }); + setValue(this._ptr + this.cStrings.length * 4, 0, 'i32'); + } + + [Symbol.dispose](): void { + this.cStrings.forEach((dm) => dm.dispose()); + super[Symbol.dispose](); + } +} + +/** + * Helper to create a libxml2 xmlNodeSet structure from an array of node pointers + */ +export class XmlNodeSetWrapper extends DisposableMalloc { + private nodeArrayMem: DisposableMalloc; + + constructor(nodes: number[]) { + super(12); // Allocate 12 bytes for the struct + const count = nodes.length; + + // allocate array of node pointers + this.nodeArrayMem = new DisposableMalloc(count * 4); + nodes.forEach((ptr, i) => { + setValue(this.nodeArrayMem._ptr + i * 4, ptr, 'i32'); + }); + + // allocate struct + setValue(this._ptr, count, 'i32'); // nodeNr + setValue(this._ptr + 4, count, 'i32'); // nodeMax + setValue(this._ptr + 8, this.nodeArrayMem._ptr, 'i32'); // nodeTab + } + + [Symbol.dispose](): void { + this.nodeArrayMem.dispose(); + super[Symbol.dispose](); + } +} diff --git a/test/crossplatform/c14n.spec.mts b/test/crossplatform/c14n.spec.mts new file mode 100644 index 0000000..beb2d0c --- /dev/null +++ b/test/crossplatform/c14n.spec.mts @@ -0,0 +1,167 @@ +import { assert, expect } from 'chai'; +import { + XmlDocument, diag, +} from '@libxml2-wasm/lib/index.mjs'; +import { + XmlC14NMode, +} from '@libxml2-wasm/lib/c14n.mjs'; + +const usingXmlDocument = (doc: XmlDocument, cb: (doc: XmlDocument) => void) => { + diag.configure({ enabled: true }); + try { + cb(doc); + } finally { + doc.dispose(); + const report = diag.report(); + diag.configure({ enabled: false }); + expect(report).to.deep.equal({}); + } +}; + +describe('C14N (XML Canonicalization)', () => { + describe('canonicalizeDocument', () => { + it('should canonicalize a simple XML document', () => { + const xmlString = 'text'; + usingXmlDocument(XmlDocument.fromString(xmlString), (doc) => { + const canonical = doc.toCanonicalString({ + mode: XmlC14NMode.XML_C14N_1_0, + }); + expect(canonical).to.be.a('string'); + expect(canonical).to.equal(xmlString); + }); + }); + + it('should order attributes', () => { + const xmlString = 'text'; + usingXmlDocument(XmlDocument.fromString(xmlString), (doc) => { + const canonical = doc.toCanonicalString({ + mode: XmlC14NMode.XML_C14N_1_0, + }); + expect(canonical).to.be.a('string'); + expect(canonical).to.equal('text'); + }); + expect(diag.report()).to.deep.equal({}); + }); + + it('should sort namespace declarations', () => { + const xmlString = 'text'; + usingXmlDocument(XmlDocument.fromString(xmlString), (doc) => { + const canonical = doc.toCanonicalString({ + mode: XmlC14NMode.XML_C14N_1_0, + }); + expect(canonical).to.be.a('string'); + expect(canonical).to.equal('text'); + }); + expect(diag.report()).to.deep.equal({}); + }); + + it('should remove whitespace between attributes', () => { + const xmlString = 'text'; + usingXmlDocument(XmlDocument.fromString(xmlString), (doc) => { + const canonical = doc.toCanonicalString({ + mode: XmlC14NMode.XML_C14N_1_0, + }); + expect(canonical).to.be.a('string'); + expect(canonical).to.equal('text'); + }); + expect(diag.report()).to.deep.equal({}); + }); + + it('should replace self-closing tags with full tags', () => { + const xmlString = ''; + usingXmlDocument(XmlDocument.fromString(xmlString), (doc) => { + const canonical = doc.toCanonicalString({ + mode: XmlC14NMode.XML_C14N_1_0, + }); + expect(canonical).to.be.a('string'); + expect(canonical).to.equal(''); + + doc.dispose(); + }); + expect(diag.report()).to.deep.equal({}); + }); + + it('should remove the XML declaration', () => { + const xmlString = 'text'; + usingXmlDocument(XmlDocument.fromString(xmlString), (doc) => { + const canonical = doc.toCanonicalString({ + mode: XmlC14NMode.XML_C14N_1_0, + }); + expect(canonical).to.be.a('string'); + expect(canonical).to.equal('text'); + }); + expect(diag.report()).to.deep.equal({}); + }); + }); + + describe('canonicalizeNode', () => { + it('should canonicalize only a specific subtree', () => { + const xmlString = 'textother'; + usingXmlDocument(XmlDocument.fromString(xmlString), (doc) => { + const node = doc.get('//ns1:child', { ns1: 'uri:ns1' }); + + expect(node).to.not.be.null; + assert(node != null); + + const canonical = doc.toCanonicalString({ + mode: XmlC14NMode.XML_C14N_1_0, node, + }); + + expect(canonical).to.be.a('string'); + expect(canonical).to.equal('text'); + }); + expect(diag.report()).to.deep.equal({}); + }); + + it('should include inclusive namespaces with exclusive canonicalization', () => { + const xmlString = 'textother'; + usingXmlDocument(XmlDocument.fromString(xmlString), (doc) => { + const inclusiveNamespaces = ['ns3']; + const node = doc.get('//ns1:child', { ns1: 'uri:ns1' }); + expect(node).to.not.be.null; + assert(node != null); + + const canonical = doc.toCanonicalString({ + node, + mode: XmlC14NMode.XML_C14N_EXCLUSIVE_1_0, + inclusiveNamespacePrefixList: inclusiveNamespaces, + }); + expect(canonical).to.be.a('string'); + expect(canonical).to.equal('text'); + }); + expect(diag.report()).to.deep.equal({}); + }); + }); + + describe('canonicalizeNodeSet', () => { + it('should work with nodeset', () => { + const xmlString = 'textother'; + usingXmlDocument(XmlDocument.fromString(xmlString), (doc) => { + const nodes = doc.find('//ns1:child/namespace::* | //ns:sibling/namespace::*', { ns: 'uri:root', ns1: 'uri:ns1' }); + + expect(nodes).to.have.lengthOf(4); + + const canonical = doc.toCanonicalString( + { mode: XmlC14NMode.XML_C14N_EXCLUSIVE_1_0, nodeSet: nodes }, + ); + + expect(canonical).to.be.a('string'); + expect(canonical).to.equal('other'); + }); + expect(diag.report()).to.deep.equal({}); + }); + }); + + describe('canonicalizeCallback', () => { + it('should work with isVisibleCallback', () => { + const xmlString = 'text'; + usingXmlDocument(XmlDocument.fromString(xmlString), (doc) => { + const canonical = doc.toCanonicalString({ + mode: XmlC14NMode.XML_C14N_EXCLUSIVE_1_0, + isVisibleCallback: () => true, + }); + expect(canonical).to.equal('text'); + }); + }); + }); +}); From 0c7cd6aa8b9a8a8907f887eab692d752252bf679 Mon Sep 17 00:00:00 2001 From: shunkica Date: Wed, 12 Nov 2025 21:34:24 +0000 Subject: [PATCH 02/11] Refactor C14N API - Replace xmlC14NDocDumpMemory with xmlC14NExecute + xmlOutputBufferCreateIO - Remove document cloning workaround (xmlCopyNode, xmlNewDoc) by implementing proper node-level canonicalization - Add canonicalize() and canonicalizeToString() methods to both XmlDocument and XmlNode classes - Introduce XmlC14NIsVisibleCallback type for custom node filtering during canonicalization - Update C14NOptions interface with better type definitions and documentation - Remove obsolete utility classes (XmlNodeSetWrapper) and simplify memory management - Expand test coverage with comprehensive tests for all C14N modes, node-level canonicalization, comments handling, and inclusive namespaces --- binding/exported-functions.txt | 12 +- binding/exported-runtime-functions.txt | 3 +- src/c14n.mts | 546 ++++++++++++------------- src/document.mts | 42 +- src/index.mts | 8 + src/libxml2.mts | 70 ++-- src/libxml2raw.d.mts | 39 +- src/nodes.mts | 35 +- src/utils.mts | 28 -- test/crossplatform/c14n.spec.mts | 221 ++++++++-- 10 files changed, 554 insertions(+), 450 deletions(-) diff --git a/binding/exported-functions.txt b/binding/exported-functions.txt index 1aa720e..460231f 100644 --- a/binding/exported-functions.txt +++ b/binding/exported-functions.txt @@ -3,15 +3,14 @@ _malloc _xmlAddChild _xmlAddNextSibling _xmlAddPrevSibling +_xmlC14NExecute _xmlCleanupInputCallbacks -_xmlCopyNode _xmlCtxtParseDtd _xmlCtxtReadMemory _xmlCtxtSetErrorHandler _xmlCtxtValidateDtd _xmlDocGetRootElement _xmlDocSetRootElement -_xmlFree _xmlFreeDoc _xmlFreeDtd _xmlFreeNode @@ -33,6 +32,8 @@ _xmlNewParserCtxt _xmlNewReference _xmlNodeGetContent _xmlNodeSetContentLen +_xmlOutputBufferClose +_xmlOutputBufferCreateIO _xmlRegisterInputCallbacks _xmlRelaxNGFree _xmlRelaxNGFreeParserCtxt @@ -76,10 +77,3 @@ _xmlXPathFreeObject _xmlXPathNewContext _xmlXPathRegisterNs _xmlXPathSetContextNode -_xmlC14NDocDumpMemory -_xmlC14NExecute -_xmlBufferCreate -_xmlOutputBufferCreateBuffer -_xmlBufferContent -_xmlOutputBufferClose -_xmlBufferFree diff --git a/binding/exported-runtime-functions.txt b/binding/exported-runtime-functions.txt index 0d995f3..188cbca 100644 --- a/binding/exported-runtime-functions.txt +++ b/binding/exported-runtime-functions.txt @@ -3,6 +3,7 @@ HEAPU8 UTF8ToString addFunction getValue -setValue lengthBytesUTF8 +removeFunction +setValue stringToUTF8 diff --git a/src/c14n.mts b/src/c14n.mts index 757d046..a58d602 100644 --- a/src/c14n.mts +++ b/src/c14n.mts @@ -1,368 +1,338 @@ import { addFunction, - getValue, DisposableMalloc, - UTF8ToString, xmlC14NDocDumpMemory, xmlC14NExecute, xmlCopyNode, - xmlDocSetRootElement, + removeFunction, + xmlC14NExecute, + xmlOutputBufferCreateIO, + xmlOutputBufferClose, XmlError, - xmlFree, - xmlFreeDoc, - xmlNewDoc, - xmlNewNs, - XmlOutputBufferHandler, DisposableXmlOutputBuffer, ContextStorage, + XmlOutputBufferHandler, + XmlTreeCommonStruct, } from './libxml2.mjs'; -import { createNode, XmlElement, type XmlNode } from './nodes.mjs'; -import type { XmlDocPtr } from './libxml2raw.mjs'; -import { - CStringArrayWrapper, XmlNodeSetWrapper, -} from './utils.mjs'; -import { Pointer } from './libxml2raw.mjs'; +import type { XmlNode } from './nodes.mjs'; +import type { + XmlDocPtr, XmlOutputBufferPtr, Pointer, XmlNodePtr, +} from './libxml2raw.mjs'; +import { CStringArrayWrapper, XmlStringOutputBufferHandler } from './utils.mjs'; +import type { XmlDocument } from './document.mjs'; +/** + * C14N (Canonical XML) modes supported by libxml2 + * @see http://www.w3.org/TR/xml-c14n + * @see http://www.w3.org/TR/xml-exc-c14n + */ export const XmlC14NMode = { + /** Original C14N 1.0 specification */ XML_C14N_1_0: 0, + /** Exclusive C14N 1.0 (omits unused namespace declarations) */ XML_C14N_EXCLUSIVE_1_0: 1, + /** C14N 1.1 specification */ XML_C14N_1_1: 2, } as const; -export type C14NOptionsBase = { +/** + * Callback to determine if a node should be included in canonicalization. + * + * @param node The node being evaluated + * @param parent The parent of the node being evaluated + * @returns true if the node should be included, false otherwise + */ +export type XmlC14NIsVisibleCallback = (node: XmlNodePtr, parent: XmlNodePtr) => boolean; + +/** + * Options for XML canonicalization + */ +export interface C14NOptionsBase { /** The canonicalization mode to use - * @see {@link XmlC14NMode} + * @default XmlC14NMode.XML_C14N_1_0 */ - mode: typeof XmlC14NMode[keyof typeof XmlC14NMode]; - /** The list of inclusive namespace prefixes (only for exclusive canonicalization) */ - inclusiveNamespacePrefixList?: string[]; + mode?: typeof XmlC14NMode[keyof typeof XmlC14NMode]; + /** Whether to include comments in the canonicalized output * @default false */ withComments?: boolean; -}; -export type C14NOptionsDocument = C14NOptionsBase & { - node?: never; - nodeSet?: never; - isVisibleCallback?: never; - userData?: never; -}; + /** List of inclusive namespace prefixes for exclusive canonicalization + * Only applies when mode is XML_C14N_EXCLUSIVE_1_0 + */ + inclusiveNamespacePrefixes?: string[]; +} -export type C14NOptionsNode = C14NOptionsBase & { - node: XmlNode; +export interface C14NOptionsWithCallback extends C14NOptionsBase { + /** Custom callback to determine node visibility + * Cannot be used together with nodeSet + */ + isVisible: XmlC14NIsVisibleCallback; nodeSet?: never; - isVisibleCallback?: never; - userData?: never; -}; - -export type C14NOptionsNodeSet = C14NOptionsBase & { - nodeSet: XmlNode[]; - node?: never; - isVisibleCallback?: never; - userData?: never; -}; +} -export type C14NOptionsCallback = C14NOptionsBase & { - node?: never; - nodeSet?: never; - isVisibleCallback: XmlC14NIsVisibleCallback; - userData?: T; -}; +export interface C14NOptionsWithNodeSet extends C14NOptionsBase { + /** Set of nodes to include in canonicalization + * Cannot be used together with isVisible + */ + nodeSet: Set; + isVisible?: never; +} -export type C14NOptions = - C14NOptionsDocument | C14NOptionsNode | C14NOptionsNodeSet | C14NOptionsCallback; +export type C14NOptions = C14NOptionsWithCallback | C14NOptionsWithNodeSet | C14NOptionsBase; /** - * Decide if a node should be included in the canonicalization. + * Check if a node is within a subtree rooted at a specific node by walking + * up the parent chain using the libxml-provided parent pointer. + * + * Important: Namespace declaration nodes (xmlNs) are not part of the tree and + * don't have a normal parent field. libxml2 calls the visibility callback with + * the owning element as `parentPtr`, so we must start walking from `parentPtr` + * rather than dereferencing the node. + * @internal */ -export type XmlC14NIsVisibleCallback = (userData: T, node: XmlNode, parent: XmlNode) => boolean; +function isNodeInSubtree(nodePtr: number, parentPtr: number, rootPtr: number): boolean { + if (nodePtr === rootPtr) { + return true; + } + let currentPtr = parentPtr; + while (currentPtr !== 0) { + if (currentPtr === rootPtr) { + return true; + } + currentPtr = XmlTreeCommonStruct.parent(currentPtr); + } + return false; +} /** - * wrap the users is visible function + * Wrap a JavaScript isVisible callback as a C function pointer. + * Signature: int(void* user_data, xmlNodePtr node, xmlNodePtr parent) + * @internal */ -export function getC14NIsVisibleCallback( - cb: XmlC14NIsVisibleCallback, - contextStorage: ContextStorage | null, +function wrapIsVisibleCallback( + jsCallback: XmlC14NIsVisibleCallback, + cascade: boolean = true, ): Pointer { - const wrapper = (userDataPtr: number, nodePtr: number, parentPtr: number): number => { - const node = createNode(nodePtr); - const parent = createNode(parentPtr); - const userDataObj = contextStorage ? contextStorage.get(userDataPtr) : undefined; - return cb(userDataObj as T, node, parent) ? 1 : 0; + // Track nodes made invisible to cascade invisibility to descendants when requested + const invisible = cascade ? new Set() : null; + const wrapper = ( + _userDataPtr: number, + nodePtr: number, + parentPtr: number, + ): number => { + if (cascade && invisible) { + if (parentPtr !== 0 && invisible.has(parentPtr)) { + invisible.add(nodePtr); + return 0; + } + } + const res = jsCallback(nodePtr, parentPtr) ? 1 : 0; + if (cascade && invisible && res === 0) invisible.add(nodePtr); + return res; }; - const funcPtr = addFunction(wrapper, 'iiii'); - return funcPtr as Pointer; + return addFunction(wrapper, 'iiii') as Pointer; } /** - * Canonicalize an XML document with a specific node + * Convert a Set to an isVisible callback + * @internal */ -export function canonicalizeWithNode( - docPtr: XmlDocPtr, - handler: XmlOutputBufferHandler, - options: C14NOptionsNode, -): void { - using docTxtMem = new DisposableMalloc(4); - let tempDoc: number | null = null; - let prefixArray: CStringArrayWrapper | null = null; - - try { - // If inclusiveNamespaces is provided - if (options.inclusiveNamespacePrefixList) { - prefixArray = new CStringArrayWrapper(options.inclusiveNamespacePrefixList); - } - - // Create a temporary document for the subtree - tempDoc = xmlNewDoc(); - if (!tempDoc) { - throw new XmlError('Failed to create new document for subtree'); - } - - // Make a deep copy of the node (1 = recursive copy) - const copiedNode = xmlCopyNode(options.node._nodePtr, 1); - if (!copiedNode) { - throw new XmlError('Failed to copy subtree node'); +function createNodeSetCallback(nodeSet: Set): Pointer { + const rootPtrs = new Set(Array.from(nodeSet).map((n) => n._nodePtr)); + const wrapper = (_userDataPtr_: number, nodePtr: number, parentPtr: number): number => { + // Visible if node itself is a selected root, or it lies within any selected root subtree + if (rootPtrs.has(nodePtr)) return 1; + let cur = parentPtr; + while (cur !== 0) { + if (rootPtrs.has(cur)) return 1; + cur = XmlTreeCommonStruct.parent(cur); } - - // Set the copied node as the root element of the new document - xmlDocSetRootElement(tempDoc, copiedNode); - - // If inclusiveNamespaces is provided, - // we need to add the namespace declarations to the root element - const inclusivePrefixes = options.inclusiveNamespacePrefixList; - if (inclusivePrefixes) { - let currentNode: XmlElement | null = options.node.parent; - while (currentNode) { - Object.entries(currentNode.nsDeclarations).forEach( - ([prefix, namespaceURI]) => { - if (inclusivePrefixes.includes(prefix)) { - const namespace = xmlNewNs(copiedNode, namespaceURI, prefix); - if (!namespace) { - throw new XmlError(`Failed to add namespace declaration "${prefix}"`); - } - } - }, - ); - currentNode = currentNode.parent; - } - } - - const mode = options.mode ?? XmlC14NMode.XML_C14N_1_0; - const withComments = options.withComments ? 1 : 0; - - const result = xmlC14NDocDumpMemory( - tempDoc, - 0, // no nodeSet for single node - mode, - prefixArray ? prefixArray._ptr : 0, - withComments, - docTxtMem._ptr, - ); - - if (result < 0) { - throw new XmlError('Failed to canonicalize XML subtree'); - } - - const txtPtr = getValue(docTxtMem._ptr, 'i32'); - if (!txtPtr) throw new XmlError('Failed to get canonicalized XML'); - - const canonicalXml = UTF8ToString(txtPtr, result); - const buffer = new TextEncoder().encode(canonicalXml); - handler.write(buffer); - - xmlFree(txtPtr); - } finally { - if (tempDoc) { - xmlFreeDoc(tempDoc); - } - if (prefixArray) { - prefixArray.dispose(); - } - } + return 0; + }; + return addFunction(wrapper, 'iiii') as Pointer; } /** - * Canonicalize an XML document with a node set - * - * TODO: I can't figure out how to add namespace nodes to the node set. - * (Error: Unsupported node type 18) + * Internal implementation using xmlC14NExecute + * @internal */ -export function canonicalizeWithNodeSet( - docPtr: XmlDocPtr, +function canonicalizeInternal( handler: XmlOutputBufferHandler, - options: C14NOptionsNodeSet, + docPtr: XmlDocPtr, + options: C14NOptions = {}, + wrapCascade: boolean = true, ): void { - using docTxtPtr = new DisposableMalloc(4); - let prefixArray: CStringArrayWrapper | null = null; - let nodeSet: XmlNodeSetWrapper | null = null; - - try { - // If inclusiveNamespaces is provided - if (options.inclusiveNamespacePrefixList) { - prefixArray = new CStringArrayWrapper(options.inclusiveNamespacePrefixList); - } - - // Create nodeSet wrapper - nodeSet = new XmlNodeSetWrapper(options.nodeSet.map((item) => item._nodePtr)); - - const mode = options.mode ?? XmlC14NMode.XML_C14N_1_0; - const withComments = options.withComments ? 1 : 0; - - const result = xmlC14NDocDumpMemory( - docPtr, - nodeSet._ptr, - mode, - prefixArray ? prefixArray._ptr : 0, - withComments, - docTxtPtr._ptr, - ); - - if (result < 0) { - throw new XmlError('Failed to canonicalize XML with node set'); - } - - const txtPtr = getValue(docTxtPtr._ptr, 'i32'); - if (!txtPtr) throw new XmlError('Failed to get canonicalized XML'); + const hasIsVisible = (opts: C14NOptions): + opts is C14NOptions & { isVisible: XmlC14NIsVisibleCallback } => typeof (opts as any).isVisible === 'function'; - const canonicalXml = UTF8ToString(txtPtr, result); - const buffer = new TextEncoder().encode(canonicalXml); - handler.write(buffer); + const hasNodeSet = (opts: C14NOptions): + opts is C14NOptions & { nodeSet: Set } => (opts as any).nodeSet instanceof Set; - xmlFree(txtPtr); - } finally { - if (prefixArray) { - prefixArray.dispose(); - } - if (nodeSet) { - nodeSet.dispose(); - } + // Validate mutually exclusive options + if (hasIsVisible(options) && hasNodeSet(options)) { + throw new XmlError('Cannot specify both isVisible and nodeSet'); } -} -/** - * Canonicalize an XML document with a callback - */ -export function canonicalizeWithCallback( - docPtr: XmlDocPtr, - handler: XmlOutputBufferHandler, - options: C14NOptionsCallback, -): void { - using outputBuffer = new DisposableXmlOutputBuffer(); + let outputBufferPtr: XmlOutputBufferPtr | null = null; let prefixArray: CStringArrayWrapper | null = null; - let contextStorage: ContextStorage | null = null; - let callbackPtr: Pointer | null = null; - let userDataPtr = 0; + let callbackPtr: Pointer = 0 as Pointer; try { - // If inclusiveNamespaces is provided - if (options.inclusiveNamespacePrefixList) { - prefixArray = new CStringArrayWrapper(options.inclusiveNamespacePrefixList); + // Create output buffer using IO callbacks + outputBufferPtr = xmlOutputBufferCreateIO(handler); + + // Convert options to callback + if (hasIsVisible(options)) { + callbackPtr = wrapIsVisibleCallback(options.isVisible, wrapCascade); + } else if (hasNodeSet(options)) { + callbackPtr = createNodeSetCallback(options.nodeSet); } - // Set up callback and user data - if (options.userData !== undefined) { - contextStorage = new ContextStorage(); - userDataPtr = contextStorage.allocate(options.userData); + // Handle inclusive namespace prefixes + if (options.inclusiveNamespacePrefixes) { + prefixArray = new CStringArrayWrapper(options.inclusiveNamespacePrefixes); } - callbackPtr = getC14NIsVisibleCallback(options.isVisibleCallback, contextStorage); - + const mode = options.mode ?? XmlC14NMode.XML_C14N_1_0; const withComments = options.withComments ? 1 : 0; const result = xmlC14NExecute( docPtr, callbackPtr, - userDataPtr, - options.mode, + 0, // user_data (not used in our callbacks) + mode, prefixArray ? prefixArray._ptr : 0, withComments, - outputBuffer.getOutputBufferPtr(), + outputBufferPtr, ); if (result < 0) { - throw new XmlError('Failed to canonicalize XML with callback'); + throw new XmlError('Failed to canonicalize XML document'); } - - const caninicalizedXml = outputBuffer.getContent(); - - // TODO: handle this better - handler.write(Buffer.from(caninicalizedXml)); } finally { if (prefixArray) { prefixArray.dispose(); } - if (contextStorage) { - contextStorage.free(userDataPtr); + if (outputBufferPtr) { + xmlOutputBufferClose(outputBufferPtr); + } + if (callbackPtr !== 0) { + removeFunction(callbackPtr); } } } /** - * Canonicalize an XML document (default mode - entire document) + * Canonicalize an entire XML document to a buffer and invoke callbacks to process. + * + + * @param handler Callback to receive the canonicalized output + * @param doc The XML document to canonicalize + * @param options Canonicalization options + * + * @example + * ```typescript + * const handler = new XmlStringOutputBufferHandler(); + * canonicalizeDocument(handler, doc, { + * mode: XmlC14NMode.XML_C14N_1_0, + * withComments: false + * }); + * ``` */ export function canonicalizeDocument( - docPtr: XmlDocPtr, handler: XmlOutputBufferHandler, - options?: C14NOptionsBase, + doc: XmlDocument, + options: C14NOptions = {}, ): void { - using docTxtPtr = new DisposableMalloc(4); - let prefixArray: CStringArrayWrapper | null = null; - - try { - // If inclusiveNamespaces is provided - if (options && options.inclusiveNamespacePrefixList) { - prefixArray = new CStringArrayWrapper(options.inclusiveNamespacePrefixList); - } - - const mode = options && options.mode ? options.mode : XmlC14NMode.XML_C14N_1_0; - const withComments = options && options.withComments ? 1 : 0; - - const result = xmlC14NDocDumpMemory( - docPtr, - 0, // no nodeSet - mode, - prefixArray ? prefixArray._ptr : 0, - withComments, - docTxtPtr._ptr, - ); - - if (result < 0) { - throw new XmlError('Failed to canonicalize XML'); - } - - const txtPtr = getValue(docTxtPtr._ptr, 'i32'); - if (!txtPtr) throw new XmlError('Failed to get canonicalized XML'); + canonicalizeInternal(handler, doc._ptr, options); +} - const canonicalXml = UTF8ToString(txtPtr, result); - const buffer = new TextEncoder().encode(canonicalXml); - handler.write(buffer); +/** + * Canonicalize an entire XML document and return as a string. + * + * @param doc The XML document to canonicalize + * @param options Canonicalization options + * @returns The canonical XML string + * + * @example + * ```typescript + * const canonical = canonicalizeDocumentToString(doc, { + * mode: XmlC14NMode.XML_C14N_1_0, + * withComments: false + * }); + * ``` + */ +export function canonicalizeDocumentToString( + doc: XmlDocument, + options: C14NOptions = {}, +): string { + const handler = new XmlStringOutputBufferHandler(); + canonicalizeDocument(handler, doc, options); + return handler.result; +} - xmlFree(txtPtr); - } finally { - if (prefixArray) { - prefixArray.dispose(); - } - } +/** + * Canonicalize a subtree of an XML document to a buffer and invoke callbacks to process. + * + * This is a convenience helper that creates an isVisible callback to filter + * only nodes within the specified subtree. + * + * @param handler Callback to receive the canonicalized output + * @param doc The document containing the subtree + * @param subtreeRoot The root node of the subtree to canonicalize + * @param options Canonicalization options (cannot include isVisible or nodeSet) + * + * @example + * ```typescript + * const element = doc.get('//my-element'); + * const handler = new XmlStringOutputBufferHandler(); + * canonicalizeSubtree(handler, doc, element!, { + * mode: XmlC14NMode.XML_C14N_EXCLUSIVE_1_0, + * inclusiveNamespacePrefixes: ['ns1', 'ns2'], + * withComments: false + * }); + * ``` + */ +export function canonicalizeSubtree( + handler: XmlOutputBufferHandler, + doc: XmlDocument, + subtreeRoot: XmlNode, + options: C14NOptionsBase = {}, +): void { + const subtreeRootPtr = subtreeRoot._nodePtr; + const isVisible = (nodePtr: number, parentPtr: number) => ( + isNodeInSubtree(nodePtr, parentPtr, subtreeRootPtr) + ); + // Use non-cascading behavior for subtree helper + canonicalizeInternal(handler, doc._ptr, { + ...options, + isVisible: isVisible as unknown as XmlC14NIsVisibleCallback, + }, /* wrapCascade */ false); } -// export function onlyATest(): string { -// const xmlString = 'text'; -// const doc = XmlDocument.fromString(xmlString); -// -// const buf = xmlBufferCreate(); -// const bufbuf = xmlOutputBufferCreateBuffer(buf, 0); -// -// const canonical = xmlC14NExecute( -// doc._ptr, -// 0, -// 0, -// 0, -// 0, -// 0, -// bufbuf, -// ); -// const errPtr = xmlGetLastError(); -// if (errPtr) { -// const code = getValue(errPtr + 16, 'i32'); // offset depends on struct layout -// const msgPtr = getValue(errPtr + 8, '*'); // check xmlError struct in libxml2 -// const msg = UTF8ToString(msgPtr); -// console.error('C14N error:', code, msg); -// } -// -// return canonical.toString(); -// } +/** + * Canonicalize a subtree of an XML document and return as a string. + * + * This is a convenience helper that creates an isVisible callback to filter + * only nodes within the specified subtree. + * + * @param doc The document containing the subtree + * @param subtreeRoot The root node of the subtree to canonicalize + * @param options Canonicalization options (cannot include isVisible or nodeSet) + * @returns The canonical XML string for the subtree + * + * @example + * ```typescript + * const element = doc.get('//my-element'); + * const canonical = canonicalizeSubtreeToString(doc, element!, { + * mode: XmlC14NMode.XML_C14N_EXCLUSIVE_1_0, + * inclusiveNamespacePrefixes: ['ns1', 'ns2'], + * withComments: false + * }); + * ``` + */ +export function canonicalizeSubtreeToString( + doc: XmlDocument, + subtreeRoot: XmlNode, + options: C14NOptionsBase = {}, +): string { + const handler = new XmlStringOutputBufferHandler(); + canonicalizeSubtree(handler, doc, subtreeRoot, options); + return handler.result; +} diff --git a/src/document.mts b/src/document.mts index 1412ecc..1f7e504 100644 --- a/src/document.mts +++ b/src/document.mts @@ -32,13 +32,7 @@ import type { XmlDocPtr, XmlParserCtxtPtr } from './libxml2raw.mjs'; import { disposeBy, XmlDisposable } from './disposable.mjs'; import { XmlDtd } from './dtd.mjs'; import { XmlStringOutputBufferHandler } from './utils.mjs'; -import { - canonicalizeDocument, - canonicalizeWithCallback, - canonicalizeWithNode, - canonicalizeWithNodeSet, - type C14NOptions, -} from './c14n.mjs'; +import { type C14NOptions, canonicalizeDocument, canonicalizeDocumentToString } from './c14n.mjs'; export enum ParseOption { XML_PARSE_DEFAULT = 0, @@ -503,34 +497,26 @@ export class XmlDocument extends XmlDisposable { } /** - * Canonicalize the XML document to a buffer and invoke the callbacks to process. + * Canonicalize the document and invoke the handler to process. + * * @param handler handlers to process the content in the buffer - * @param options Canonicalization options - * @see {@link toCanonicalString} + * @param options options to adjust the canonicalization behavior + * @see {@link canonicalizeDocument} + * @see {@link canonicalizeToString} */ canonicalize(handler: XmlOutputBufferHandler, options?: C14NOptions): void { - if (!options) { - canonicalizeDocument(this._ptr, handler); - } else if (options.node) { - canonicalizeWithNode(this._ptr, handler, options); - } else if (options.nodeSet) { - canonicalizeWithNodeSet(this._ptr, handler, options); - } else if (options.isVisibleCallback) { - canonicalizeWithCallback(this._ptr, handler, options); - } else { - canonicalizeDocument(this._ptr, handler, options); - } + canonicalizeDocument(handler, this, options); } /** - * Canonicalize the XML document and return the result as a string. - * @param options Canonicalization options - * @returns The canonicalized XML as a string + * Canonicalize the document to a string. + * + * @param options options to adjust the canonicalization behavior + * @returns The canonicalized XML string + * @see {@link canonicalizeDocumentToString} * @see {@link canonicalize} */ - toCanonicalString(options?: C14NOptions): string { - const handler = new XmlStringOutputBufferHandler(); - this.canonicalize(handler, options); - return handler.result; + canonicalizeToString(options?: C14NOptions): string { + return canonicalizeDocumentToString(this, options); } } diff --git a/src/index.mts b/src/index.mts index cf302ea..176fd8c 100644 --- a/src/index.mts +++ b/src/index.mts @@ -52,4 +52,12 @@ export { readBuffer, closeBuffer, XmlBufferInputProvider, + XmlStringOutputBufferHandler, } from './utils.mjs'; +export { + XmlC14NMode, + canonicalizeSubtree, + canonicalizeSubtreeToString, + type C14NOptions, + type XmlC14NIsVisibleCallback, +} from './c14n.mjs'; diff --git a/src/libxml2.mts b/src/libxml2.mts index 669816e..38de1ea 100644 --- a/src/libxml2.mts +++ b/src/libxml2.mts @@ -7,6 +7,7 @@ import type { XmlErrorPtr, XmlNodePtr, XmlNsPtr, + XmlOutputBufferPtr, XmlParserCtxtPtr, XmlSaveCtxtPtr, XmlXPathCompExprPtr, @@ -15,12 +16,20 @@ import type { import moduleLoader from './libxml2raw.mjs'; import { disposeBy, XmlDisposable } from './disposable.mjs'; +// Re-export types for use in other modules +export type { + Pointer, + XmlDocPtr, + XmlNodePtr, + XmlOutputBufferPtr, +} from './libxml2raw.mjs'; + const libxml2 = await moduleLoader(); libxml2._xmlInitParser(); // Export specific functions needed by other modules export const { - getValue, setValue, UTF8ToString, lengthBytesUTF8, stringToUTF8, addFunction, + getValue, setValue, UTF8ToString, lengthBytesUTF8, stringToUTF8, addFunction, removeFunction, } = libxml2; /** @@ -663,58 +672,13 @@ export class DisposableMalloc extends XmlDisposable { } } -/** - * Maybe also don't expose xmlBuffer* functions directly? - * Don't reuse this buffer. - */ -@disposeBy(libxml2._xmlBufferFree) -export class DisposableXmlOutputBuffer extends XmlDisposable { - private _content: string | null = null; - - private _outputBufferPtr: number; - - constructor() { - super(libxml2._xmlBufferCreate()); - this._outputBufferPtr = libxml2._xmlOutputBufferCreateBuffer(this._ptr, 0); - } - - getOutputBufferPtr(): Pointer { - return this._outputBufferPtr; - } - - // closes the buffer and gets is content as string. - getContent(): string { - if (this._content) { - return this._content; - } - if (this._outputBufferPtr === 0) { - throw new XmlError('Output buffer has been closed'); - } - libxml2._xmlOutputBufferClose(this._outputBufferPtr); - this._outputBufferPtr = 0; - const contentPtr = libxml2._xmlBufferContent(this._ptr); - this._content = libxml2.UTF8ToString(contentPtr); - return this._content; - } - - [Symbol.dispose]() { - if (this._outputBufferPtr !== 0) { - libxml2._xmlOutputBufferClose(this._outputBufferPtr); - this._outputBufferPtr = 0; - } - super[Symbol.dispose](); - } -} - export const xmlAddChild = libxml2._xmlAddChild; export const xmlAddNextSibling = libxml2._xmlAddNextSibling; export const xmlAddPrevSibling = libxml2._xmlAddPrevSibling; -export const xmlCopyNode = libxml2._xmlCopyNode; export const xmlCtxtSetErrorHandler = libxml2._xmlCtxtSetErrorHandler; export const xmlCtxtValidateDtd = libxml2._xmlCtxtValidateDtd; export const xmlDocGetRootElement = libxml2._xmlDocGetRootElement; export const xmlDocSetRootElement = libxml2._xmlDocSetRootElement; -export const xmlFree = libxml2._xmlFree; export const xmlFreeDoc = libxml2._xmlFreeDoc; export const xmlFreeNode = libxml2._xmlFreeNode; export const xmlFreeDtd = libxml2._xmlFreeDtd; @@ -759,5 +723,17 @@ export const xmlXPathFreeContext = libxml2._xmlXPathFreeContext; export const xmlXPathFreeObject = libxml2._xmlXPathFreeObject; export const xmlXPathNewContext = libxml2._xmlXPathNewContext; export const xmlXPathSetContextNode = libxml2._xmlXPathSetContextNode; -export const xmlC14NDocDumpMemory = libxml2._xmlC14NDocDumpMemory; + +/** + * Create an output buffer using I/O callbacks (same pattern as xmlSaveToIO) + * @internal + */ +export function xmlOutputBufferCreateIO( + handler: XmlOutputBufferHandler, +): XmlOutputBufferPtr { + const index = outputHandlerStorage.allocate(handler); // will be freed in outputClose + return libxml2._xmlOutputBufferCreateIO(outputWrite, outputClose, index, 0); +} + +export const xmlOutputBufferClose = libxml2._xmlOutputBufferClose; export const xmlC14NExecute = libxml2._xmlC14NExecute; diff --git a/src/libxml2raw.d.mts b/src/libxml2raw.d.mts index 0295aa2..cc9a63c 100644 --- a/src/libxml2raw.d.mts +++ b/src/libxml2raw.d.mts @@ -26,6 +26,7 @@ type XmlXIncludeCtxtPtr = Pointer; type XmlXPathCompExprPtr = Pointer; type XmlXPathContextPtr = Pointer; type XmlXPathObjectPtr = Pointer; +type XmlOutputBufferPtr = Pointer; export class LibXml2 { HEAP32: Int32Array; @@ -180,23 +181,39 @@ export class LibXml2 { with_comments: number, buf: Pointer, ): number; - // _xmlBufferCreate - // _xmlOutputBufferCreateBuffer - // _xmlBufferContent - // _xmlOutputBufferClose - // _xmlBufferFree - _xmlBufferCreate(): Pointer; - _xmlOutputBufferCreateBuffer(buffer: Pointer, encoder: Pointer): Pointer; - _xmlBufferContent(buffer: Pointer): Pointer; - _xmlOutputBufferClose(outputBuffer: Pointer): number; - _xmlBufferFree(buffer: Pointer): void; + _xmlOutputBufferCreateIO( + iowrite: Pointer, + ioclose: Pointer, + ioctx: Pointer, + encoder: Pointer, + ): XmlOutputBufferPtr; + _xmlOutputBufferClose(out: XmlOutputBufferPtr): number; // runtime functions UTF8ToString(ptr: CString, maxBytesToRead?: number): string; addFunction(func: Function, sig: string): Pointer; getValue(ptr: Pointer, type: string): number; - setValue(ptr: Pointer, value: number, type: string): void; lengthBytesUTF8(str: string): number; + removeFunction(ptr: Pointer): void; + setValue(ptr: Pointer, value: number, type: string): void; stringToUTF8(str: string, outPtr: CString, maxBytesToWrite: number): CString; } +export type { + Pointer, + CString, + XmlAttrPtr, + XmlDocPtr, + XmlDtdPtr, + XmlErrorPtr, + XmlNodePtr, + XmlNsPtr, + XmlOutputBufferPtr, + XmlParserCtxtPtr, + XmlParserInputPtr, + XmlSaveCtxtPtr, + XmlXPathCompExprPtr, + XmlXPathContextPtr, + XmlXPathObjectPtr, +}; + export default function moduleLoader(): Promise; diff --git a/src/nodes.mts b/src/nodes.mts index 140c564..9268426 100644 --- a/src/nodes.mts +++ b/src/nodes.mts @@ -42,6 +42,9 @@ import { import type { XmlDocPtr, XmlNodePtr, XmlNsPtr } from './libxml2raw.mjs'; import { XmlStringOutputBufferHandler } from './utils.mjs'; import { NamespaceMap, XmlXPath } from './xpath.mjs'; +import { + canonicalizeSubtree, canonicalizeSubtreeToString, C14NOptionsBase, +} from './c14n.mjs'; function compiledXPathEval(nodePtr: XmlNodePtr, xpath: XmlXPath) { const context = xmlXPathNewContext(XmlNodeStruct.doc(nodePtr)); @@ -69,7 +72,7 @@ function xpathEval(nodePtr: XmlNodePtr, xpath: string | XmlXPath, namespaces?: N } interface XmlNodeConstructor { - new (ptr: XmlNodePtr): T; + new(ptr: XmlNodePtr): T; } const nodeConstructors: Map< @@ -187,6 +190,30 @@ export abstract class XmlNode { return XmlNodeStruct.line(this._nodePtr); } + /** + * Canonicalize this node and its subtree to a buffer and invoke the handler to process. + * + * @param handler handlers to process the content in the buffer + * @param options options to adjust the canonicalization behavior + * @see {@link canonicalizeSubtree} + * @see {@link canonicalizeToString} + */ + canonicalize(handler: XmlOutputBufferHandler, options?: C14NOptionsBase): void { + canonicalizeSubtree(handler, this.doc, this, options); + } + + /** + * Canonicalize this node and its subtree and return the result as a string. + * + * @param options options to adjust the canonicalization behavior + * @returns The canonicalized XML string. + * @see {@link canonicalizeSubtreeToString} + * @see {@link canonicalize} + */ + canonicalizeToString(options?: C14NOptionsBase): string { + return canonicalizeSubtreeToString(this.doc, this, options); + } + /** * Find the first descendant node matching the given compiled xpath selector * @@ -585,7 +612,8 @@ function namedNode( }); } -export interface XmlElement extends XmlNamedNode {} +export interface XmlElement extends XmlNamedNode { +} /** * The class representing an XML element node. @@ -805,7 +833,8 @@ export class XmlElement extends XmlTreeNode { } } -export interface XmlAttribute extends XmlNamedNode {} +export interface XmlAttribute extends XmlNamedNode { +} /** * The class representing an XML attribute node. diff --git a/src/utils.mts b/src/utils.mts index 4073c19..a0ae991 100644 --- a/src/utils.mts +++ b/src/utils.mts @@ -144,31 +144,3 @@ export class CStringArrayWrapper extends DisposableMalloc { super[Symbol.dispose](); } } - -/** - * Helper to create a libxml2 xmlNodeSet structure from an array of node pointers - */ -export class XmlNodeSetWrapper extends DisposableMalloc { - private nodeArrayMem: DisposableMalloc; - - constructor(nodes: number[]) { - super(12); // Allocate 12 bytes for the struct - const count = nodes.length; - - // allocate array of node pointers - this.nodeArrayMem = new DisposableMalloc(count * 4); - nodes.forEach((ptr, i) => { - setValue(this.nodeArrayMem._ptr + i * 4, ptr, 'i32'); - }); - - // allocate struct - setValue(this._ptr, count, 'i32'); // nodeNr - setValue(this._ptr + 4, count, 'i32'); // nodeMax - setValue(this._ptr + 8, this.nodeArrayMem._ptr, 'i32'); // nodeTab - } - - [Symbol.dispose](): void { - this.nodeArrayMem.dispose(); - super[Symbol.dispose](); - } -} diff --git a/test/crossplatform/c14n.spec.mts b/test/crossplatform/c14n.spec.mts index beb2d0c..270e1a8 100644 --- a/test/crossplatform/c14n.spec.mts +++ b/test/crossplatform/c14n.spec.mts @@ -1,10 +1,8 @@ import { assert, expect } from 'chai'; import { - XmlDocument, diag, + XmlDocument, diag, XmlC14NMode, canonicalizeSubtreeToString, XmlStringOutputBufferHandler, } from '@libxml2-wasm/lib/index.mjs'; -import { - XmlC14NMode, -} from '@libxml2-wasm/lib/c14n.mjs'; +import { XmlTreeCommonStruct } from '@libxml2-wasm/lib/libxml2.mjs'; const usingXmlDocument = (doc: XmlDocument, cb: (doc: XmlDocument) => void) => { diag.configure({ enabled: true }); @@ -23,7 +21,7 @@ describe('C14N (XML Canonicalization)', () => { it('should canonicalize a simple XML document', () => { const xmlString = 'text'; usingXmlDocument(XmlDocument.fromString(xmlString), (doc) => { - const canonical = doc.toCanonicalString({ + const canonical = doc.canonicalizeToString({ mode: XmlC14NMode.XML_C14N_1_0, }); expect(canonical).to.be.a('string'); @@ -34,67 +32,60 @@ describe('C14N (XML Canonicalization)', () => { it('should order attributes', () => { const xmlString = 'text'; usingXmlDocument(XmlDocument.fromString(xmlString), (doc) => { - const canonical = doc.toCanonicalString({ + const canonical = doc.canonicalizeToString({ mode: XmlC14NMode.XML_C14N_1_0, }); expect(canonical).to.be.a('string'); expect(canonical).to.equal('text'); }); - expect(diag.report()).to.deep.equal({}); }); it('should sort namespace declarations', () => { const xmlString = 'text'; usingXmlDocument(XmlDocument.fromString(xmlString), (doc) => { - const canonical = doc.toCanonicalString({ + const canonical = doc.canonicalizeToString({ mode: XmlC14NMode.XML_C14N_1_0, }); expect(canonical).to.be.a('string'); expect(canonical).to.equal('text'); }); - expect(diag.report()).to.deep.equal({}); }); it('should remove whitespace between attributes', () => { const xmlString = 'text'; usingXmlDocument(XmlDocument.fromString(xmlString), (doc) => { - const canonical = doc.toCanonicalString({ + const canonical = doc.canonicalizeToString({ mode: XmlC14NMode.XML_C14N_1_0, }); expect(canonical).to.be.a('string'); expect(canonical).to.equal('text'); }); - expect(diag.report()).to.deep.equal({}); }); it('should replace self-closing tags with full tags', () => { const xmlString = ''; usingXmlDocument(XmlDocument.fromString(xmlString), (doc) => { - const canonical = doc.toCanonicalString({ + const canonical = doc.canonicalizeToString({ mode: XmlC14NMode.XML_C14N_1_0, }); expect(canonical).to.be.a('string'); expect(canonical).to.equal(''); - - doc.dispose(); }); - expect(diag.report()).to.deep.equal({}); }); it('should remove the XML declaration', () => { const xmlString = 'text'; usingXmlDocument(XmlDocument.fromString(xmlString), (doc) => { - const canonical = doc.toCanonicalString({ + const canonical = doc.canonicalizeToString({ mode: XmlC14NMode.XML_C14N_1_0, }); expect(canonical).to.be.a('string'); expect(canonical).to.equal('text'); }); - expect(diag.report()).to.deep.equal({}); }); }); - describe('canonicalizeNode', () => { + describe('canonicalizeSubtree', () => { it('should canonicalize only a specific subtree', () => { const xmlString = 'textother'; usingXmlDocument(XmlDocument.fromString(xmlString), (doc) => { @@ -103,14 +94,13 @@ describe('C14N (XML Canonicalization)', () => { expect(node).to.not.be.null; assert(node != null); - const canonical = doc.toCanonicalString({ - mode: XmlC14NMode.XML_C14N_1_0, node, + const canonical = canonicalizeSubtreeToString(doc, node, { + mode: XmlC14NMode.XML_C14N_EXCLUSIVE_1_0, }); expect(canonical).to.be.a('string'); - expect(canonical).to.equal('text'); + expect(canonical).to.equal('text'); }); - expect(diag.report()).to.deep.equal({}); }); it('should include inclusive namespaces with exclusive canonicalization', () => { @@ -121,15 +111,13 @@ describe('C14N (XML Canonicalization)', () => { expect(node).to.not.be.null; assert(node != null); - const canonical = doc.toCanonicalString({ - node, + const canonical = canonicalizeSubtreeToString(doc, node, { mode: XmlC14NMode.XML_C14N_EXCLUSIVE_1_0, - inclusiveNamespacePrefixList: inclusiveNamespaces, + inclusiveNamespacePrefixes: inclusiveNamespaces, }); expect(canonical).to.be.a('string'); expect(canonical).to.equal('text'); }); - expect(diag.report()).to.deep.equal({}); }); }); @@ -137,31 +125,194 @@ describe('C14N (XML Canonicalization)', () => { it('should work with nodeset', () => { const xmlString = 'textother'; usingXmlDocument(XmlDocument.fromString(xmlString), (doc) => { - const nodes = doc.find('//ns1:child/namespace::* | //ns:sibling/namespace::*', { ns: 'uri:root', ns1: 'uri:ns1' }); + const nodes = doc.find('//ns1:child | //ns:sibling', { ns: 'uri:root', ns1: 'uri:ns1' }); - expect(nodes).to.have.lengthOf(4); + expect(nodes).to.have.lengthOf(2); - const canonical = doc.toCanonicalString( - { mode: XmlC14NMode.XML_C14N_EXCLUSIVE_1_0, nodeSet: nodes }, + const nodeSet = new Set(nodes); + const canonical = doc.canonicalizeToString( + { mode: XmlC14NMode.XML_C14N_EXCLUSIVE_1_0, nodeSet }, ); expect(canonical).to.be.a('string'); - expect(canonical).to.equal('other'); + expect(canonical).to.equal('textother'); }); - expect(diag.report()).to.deep.equal({}); }); }); describe('canonicalizeCallback', () => { - it('should work with isVisibleCallback', () => { + it('should work with isVisible callback', () => { const xmlString = 'text'; usingXmlDocument(XmlDocument.fromString(xmlString), (doc) => { - const canonical = doc.toCanonicalString({ + const canonical = doc.canonicalizeToString({ mode: XmlC14NMode.XML_C14N_EXCLUSIVE_1_0, - isVisibleCallback: () => true, + isVisible: () => true, }); expect(canonical).to.equal('text'); }); }); + + it('should filter nodes with custom isVisible callback', () => { + const xmlString = 'preservedfiltered'; + usingXmlDocument(XmlDocument.fromString(xmlString), (doc) => { + const canonical = doc.canonicalizeToString({ + mode: XmlC14NMode.XML_C14N_1_0, + isVisible: (nodePtr: number) => { + // Filter out elements named 'remove' + const type = XmlTreeCommonStruct.type(nodePtr); + // Only check name for element nodes (type 1) + if (type === 1) { + const name = XmlTreeCommonStruct.name_(nodePtr); + return name !== 'remove'; + } + return true; // Include all other node types + }, + }); + expect(canonical).to.equal('preserved'); + }); + }); + + it('should throw error when both isVisible and nodeSet are provided', () => { + const xmlString = 'text'; + usingXmlDocument(XmlDocument.fromString(xmlString), (doc) => { + const nodes = doc.find('//child'); + const nodeSet = new Set(nodes); + expect(() => doc.canonicalizeToString({ + isVisible: () => true, + nodeSet, + } as any)).to.throw('Cannot specify both isVisible and nodeSet'); + }); + }); + }); + + describe('canonicalization with comments', () => { + it('should exclude comments by default', () => { + const xmlString = 'text'; + usingXmlDocument(XmlDocument.fromString(xmlString), (doc) => { + const canonical = doc.canonicalizeToString({ mode: XmlC14NMode.XML_C14N_1_0 }); + expect(canonical).to.equal('text'); + }); + }); + + it('should include comments when withComments is true', () => { + const xmlString = 'text'; + usingXmlDocument(XmlDocument.fromString(xmlString), (doc) => { + const canonical = doc.canonicalizeToString({ + mode: XmlC14NMode.XML_C14N_1_0, + withComments: true, + }); + expect(canonical).to.equal('text'); + }); + }); + }); + + describe('all C14N modes', () => { + const xmlString = 'text'; + + it('should canonicalize with XML_C14N_1_0', () => { + usingXmlDocument(XmlDocument.fromString(xmlString), (doc) => { + const canonical = doc.canonicalizeToString({ mode: XmlC14NMode.XML_C14N_1_0 }); + expect(canonical).to.be.a('string'); + expect(canonical).to.include('xmlns'); + }); + }); + + it('should canonicalize with XML_C14N_EXCLUSIVE_1_0', () => { + usingXmlDocument(XmlDocument.fromString(xmlString), (doc) => { + const canonical = doc.canonicalizeToString({ + mode: XmlC14NMode.XML_C14N_EXCLUSIVE_1_0, + }); + expect(canonical).to.be.a('string'); + expect(canonical).to.include('xmlns'); + }); + }); + + it('should canonicalize with XML_C14N_1_1', () => { + usingXmlDocument(XmlDocument.fromString(xmlString), (doc) => { + const canonical = doc.canonicalizeToString({ mode: XmlC14NMode.XML_C14N_1_1 }); + expect(canonical).to.be.a('string'); + expect(canonical).to.include('xmlns'); + }); + }); + }); + + describe('node.toCanonicalString() / node.canonicalize()', () => { + it('should canonicalize a node subtree', () => { + const xmlString = 'textother'; + usingXmlDocument(XmlDocument.fromString(xmlString), (doc) => { + const node = doc.get('//ns1:child', { ns1: 'uri:ns1' }); + expect(node).to.not.be.null; + assert(node != null); + + const canonical = node.canonicalizeToString({ + mode: XmlC14NMode.XML_C14N_EXCLUSIVE_1_0, + }); + + expect(canonical).to.be.a('string'); + expect(canonical).to.equal('text'); + }); + }); + + it('should work with default options', () => { + const xmlString = 'text'; + usingXmlDocument(XmlDocument.fromString(xmlString), (doc) => { + const node = doc.get('//child'); + expect(node).to.not.be.null; + assert(node != null); + + const canonical = node.canonicalizeToString(); + + expect(canonical).to.be.a('string'); + expect(canonical).to.equal('text'); + }); + }); + + it('should support inclusive namespaces', () => { + const xmlString = 'text'; + usingXmlDocument(XmlDocument.fromString(xmlString), (doc) => { + const node = doc.get('//ns1:child', { ns1: 'uri:ns1' }); + expect(node).to.not.be.null; + assert(node != null); + + const canonical = node.canonicalizeToString({ + mode: XmlC14NMode.XML_C14N_EXCLUSIVE_1_0, + inclusiveNamespacePrefixes: ['ns3'], + }); + + expect(canonical).to.be.a('string'); + expect(canonical).to.equal('text'); + }); + }); + + it('should include comments when requested', () => { + const xmlString = 'text'; + usingXmlDocument(XmlDocument.fromString(xmlString), (doc) => { + const node = doc.get('//child'); + expect(node).to.not.be.null; + assert(node != null); + + const canonical = node.canonicalizeToString({ withComments: true }); + + expect(canonical).to.be.a('string'); + expect(canonical).to.equal('text'); + }); + }); + + it('should support canonicalize(handler, options) API', () => { + const xmlString = 'textother'; + usingXmlDocument(XmlDocument.fromString(xmlString), (doc) => { + const node = doc.get('//ns1:child', { ns1: 'uri:ns1' }); + expect(node).to.not.be.null; + assert(node != null); + + const handler = new XmlStringOutputBufferHandler(); + node.canonicalize(handler, { + mode: XmlC14NMode.XML_C14N_EXCLUSIVE_1_0, + }); + + expect(handler.result).to.be.a('string'); + expect(handler.result).to.equal('text'); + }); + }); }); }); From 7d1b61e7bbbc5d50851afc34dcb6f8e67f222cc1 Mon Sep 17 00:00:00 2001 From: shunkica Date: Thu, 13 Nov 2025 12:13:24 +0100 Subject: [PATCH 03/11] revert un-needed changes --- src/libxml2.mts | 8 -------- src/libxml2raw.d.mts | 18 ------------------ src/nodes.mts | 10 ++++------ 3 files changed, 4 insertions(+), 32 deletions(-) diff --git a/src/libxml2.mts b/src/libxml2.mts index 38de1ea..b0b0af1 100644 --- a/src/libxml2.mts +++ b/src/libxml2.mts @@ -16,14 +16,6 @@ import type { import moduleLoader from './libxml2raw.mjs'; import { disposeBy, XmlDisposable } from './disposable.mjs'; -// Re-export types for use in other modules -export type { - Pointer, - XmlDocPtr, - XmlNodePtr, - XmlOutputBufferPtr, -} from './libxml2raw.mjs'; - const libxml2 = await moduleLoader(); libxml2._xmlInitParser(); diff --git a/src/libxml2raw.d.mts b/src/libxml2raw.d.mts index cc9a63c..612787a 100644 --- a/src/libxml2raw.d.mts +++ b/src/libxml2raw.d.mts @@ -198,22 +198,4 @@ export class LibXml2 { stringToUTF8(str: string, outPtr: CString, maxBytesToWrite: number): CString; } -export type { - Pointer, - CString, - XmlAttrPtr, - XmlDocPtr, - XmlDtdPtr, - XmlErrorPtr, - XmlNodePtr, - XmlNsPtr, - XmlOutputBufferPtr, - XmlParserCtxtPtr, - XmlParserInputPtr, - XmlSaveCtxtPtr, - XmlXPathCompExprPtr, - XmlXPathContextPtr, - XmlXPathObjectPtr, -}; - export default function moduleLoader(): Promise; diff --git a/src/nodes.mts b/src/nodes.mts index 9268426..91ed002 100644 --- a/src/nodes.mts +++ b/src/nodes.mts @@ -72,7 +72,7 @@ function xpathEval(nodePtr: XmlNodePtr, xpath: string | XmlXPath, namespaces?: N } interface XmlNodeConstructor { - new(ptr: XmlNodePtr): T; + new (ptr: XmlNodePtr): T; } const nodeConstructors: Map< @@ -91,7 +91,7 @@ export function forNodeType(nodeType: XmlNodeStruct.Type) { }; } -export function createNode(nodePtr: XmlNodePtr): XmlNode { +function createNode(nodePtr: XmlNodePtr): XmlNode { const nodeType = XmlNodeStruct.type(nodePtr); const Constructor = nodeConstructors.get(nodeType); @@ -612,8 +612,7 @@ function namedNode( }); } -export interface XmlElement extends XmlNamedNode { -} +export interface XmlElement extends XmlNamedNode {} /** * The class representing an XML element node. @@ -833,8 +832,7 @@ export class XmlElement extends XmlTreeNode { } } -export interface XmlAttribute extends XmlNamedNode { -} +export interface XmlAttribute extends XmlNamedNode {} /** * The class representing an XML attribute node. From a3cfcfbc065b912f100e09d64892958a836dbb51 Mon Sep 17 00:00:00 2001 From: shunkica Date: Thu, 13 Nov 2025 12:35:35 +0100 Subject: [PATCH 04/11] export full c14n api and types --- src/index.mts | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/index.mts b/src/index.mts index 176fd8c..7aa3935 100644 --- a/src/index.mts +++ b/src/index.mts @@ -56,8 +56,13 @@ export { } from './utils.mjs'; export { XmlC14NMode, + canonicalizeDocument, + canonicalizeDocumentToString, canonicalizeSubtree, canonicalizeSubtreeToString, type C14NOptions, + type C14NOptionsBase, + type C14NOptionsWithCallback, + type C14NOptionsWithNodeSet, type XmlC14NIsVisibleCallback, } from './c14n.mjs'; From b217bcef356c30e877e4f7c70d1e945135b667d9 Mon Sep 17 00:00:00 2001 From: shunkica Date: Tue, 25 Nov 2025 00:58:28 +0100 Subject: [PATCH 05/11] refactor: refine canonicalization API, expose types, integrate document/node methods, adjust memory handling and callbacks --- src/c14n.mts | 195 +++++++++++++------------------ src/document.mts | 7 +- src/index.mts | 4 - src/libxml2.mts | 68 ++++++----- src/nodes.mts | 7 +- src/utils.mts | 62 +++++----- test/crossplatform/c14n.spec.mts | 17 ++- 7 files changed, 169 insertions(+), 191 deletions(-) diff --git a/src/c14n.mts b/src/c14n.mts index a58d602..449360f 100644 --- a/src/c14n.mts +++ b/src/c14n.mts @@ -1,6 +1,6 @@ import { addFunction, - removeFunction, + CStringArrayWrapper, xmlC14NExecute, xmlOutputBufferCreateIO, xmlOutputBufferClose, @@ -12,8 +12,68 @@ import type { XmlNode } from './nodes.mjs'; import type { XmlDocPtr, XmlOutputBufferPtr, Pointer, XmlNodePtr, } from './libxml2raw.mjs'; -import { CStringArrayWrapper, XmlStringOutputBufferHandler } from './utils.mjs'; import type { XmlDocument } from './document.mjs'; +import { ContextStorage } from './utils.mjs'; + +/** + * Context for the C14N isVisible callback. + * @internal + */ +interface C14NCallbackContext { + /** The JS callback to invoke, or null if using nodeSet mode */ + jsCallback: XmlC14NIsVisibleCallback | null; + /** For nodeSet mode: set of root pointers to check against */ + rootPtrs: Set | null; + /** Whether to cascade invisibility to descendants */ + cascade: boolean; + /** Tracks nodes made invisible (for cascade mode) */ + invisible: Set | null; +} + +const c14nCallbackStorage = new ContextStorage(); + +/** + * Global C14N visibility callback - created once at module initialization. + * Signature: int(void* user_data, xmlNodePtr node, xmlNodePtr parent) + * @internal + */ +const c14nIsVisibleCallback = addFunction( + (userDataIndex: number, nodePtr: number, parentPtr: number): number => { + const ctx = c14nCallbackStorage.get(userDataIndex); + + // Handle nodeSet mode + if (ctx.rootPtrs !== null) { + // Visible if node is a selected root, or lies within any selected root subtree + if (ctx.rootPtrs.has(nodePtr)) return 1; + let cur = parentPtr; + while (cur !== 0) { + if (ctx.rootPtrs.has(cur)) return 1; + cur = XmlTreeCommonStruct.parent(cur); + } + return 0; + } + + // Handle isVisible callback mode + if (ctx.jsCallback !== null) { + // Cascade invisibility check + if (ctx.cascade && ctx.invisible) { + if (parentPtr !== 0 && ctx.invisible.has(parentPtr)) { + ctx.invisible.add(nodePtr); + return 0; + } + } + const res = ctx.jsCallback(nodePtr, parentPtr) ? 1 : 0; + if (ctx.cascade && ctx.invisible && res === 0) { + ctx.invisible.add(nodePtr); + } + return res; + } + + // No callback or nodeSet - include all nodes + return 1; + }, + 'iiii', +) as Pointer; /** * C14N (Canonical XML) modes supported by libxml2 @@ -100,54 +160,6 @@ function isNodeInSubtree(nodePtr: number, parentPtr: number, rootPtr: number): b return false; } -/** - * Wrap a JavaScript isVisible callback as a C function pointer. - * Signature: int(void* user_data, xmlNodePtr node, xmlNodePtr parent) - * @internal - */ -function wrapIsVisibleCallback( - jsCallback: XmlC14NIsVisibleCallback, - cascade: boolean = true, -): Pointer { - // Track nodes made invisible to cascade invisibility to descendants when requested - const invisible = cascade ? new Set() : null; - const wrapper = ( - _userDataPtr: number, - nodePtr: number, - parentPtr: number, - ): number => { - if (cascade && invisible) { - if (parentPtr !== 0 && invisible.has(parentPtr)) { - invisible.add(nodePtr); - return 0; - } - } - const res = jsCallback(nodePtr, parentPtr) ? 1 : 0; - if (cascade && invisible && res === 0) invisible.add(nodePtr); - return res; - }; - return addFunction(wrapper, 'iiii') as Pointer; -} - -/** - * Convert a Set to an isVisible callback - * @internal - */ -function createNodeSetCallback(nodeSet: Set): Pointer { - const rootPtrs = new Set(Array.from(nodeSet).map((n) => n._nodePtr)); - const wrapper = (_userDataPtr_: number, nodePtr: number, parentPtr: number): number => { - // Visible if node itself is a selected root, or it lies within any selected root subtree - if (rootPtrs.has(nodePtr)) return 1; - let cur = parentPtr; - while (cur !== 0) { - if (rootPtrs.has(cur)) return 1; - cur = XmlTreeCommonStruct.parent(cur); - } - return 0; - }; - return addFunction(wrapper, 'iiii') as Pointer; -} - /** * Internal implementation using xmlC14NExecute * @internal @@ -156,7 +168,7 @@ function canonicalizeInternal( handler: XmlOutputBufferHandler, docPtr: XmlDocPtr, options: C14NOptions = {}, - wrapCascade: boolean = true, + cascade: boolean = true, ): void { const hasIsVisible = (opts: C14NOptions): opts is C14NOptions & { isVisible: XmlC14NIsVisibleCallback } => typeof (opts as any).isVisible === 'function'; @@ -171,17 +183,23 @@ function canonicalizeInternal( let outputBufferPtr: XmlOutputBufferPtr | null = null; let prefixArray: CStringArrayWrapper | null = null; - let callbackPtr: Pointer = 0 as Pointer; + let contextIndex: number = 0; try { // Create output buffer using IO callbacks outputBufferPtr = xmlOutputBufferCreateIO(handler); - // Convert options to callback - if (hasIsVisible(options)) { - callbackPtr = wrapIsVisibleCallback(options.isVisible, wrapCascade); - } else if (hasNodeSet(options)) { - callbackPtr = createNodeSetCallback(options.nodeSet); + // Build callback context based on options + if (hasIsVisible(options) || hasNodeSet(options)) { + const context: C14NCallbackContext = { + jsCallback: hasIsVisible(options) ? options.isVisible : null, + rootPtrs: hasNodeSet(options) + ? new Set(Array.from(options.nodeSet).map((n) => n._nodePtr)) + : null, + cascade, + invisible: cascade ? new Set() : null, + }; + contextIndex = c14nCallbackStorage.allocate(context); } // Handle inclusive namespace prefixes @@ -194,8 +212,8 @@ function canonicalizeInternal( const result = xmlC14NExecute( docPtr, - callbackPtr, - 0, // user_data (not used in our callbacks) + contextIndex !== 0 ? c14nIsVisibleCallback : 0 as Pointer, + contextIndex, // user_data is the storage index mode, prefixArray ? prefixArray._ptr : 0, withComments, @@ -212,8 +230,8 @@ function canonicalizeInternal( if (outputBufferPtr) { xmlOutputBufferClose(outputBufferPtr); } - if (callbackPtr !== 0) { - removeFunction(callbackPtr); + if (contextIndex !== 0) { + c14nCallbackStorage.free(contextIndex); } } } @@ -243,30 +261,6 @@ export function canonicalizeDocument( canonicalizeInternal(handler, doc._ptr, options); } -/** - * Canonicalize an entire XML document and return as a string. - * - * @param doc The XML document to canonicalize - * @param options Canonicalization options - * @returns The canonical XML string - * - * @example - * ```typescript - * const canonical = canonicalizeDocumentToString(doc, { - * mode: XmlC14NMode.XML_C14N_1_0, - * withComments: false - * }); - * ``` - */ -export function canonicalizeDocumentToString( - doc: XmlDocument, - options: C14NOptions = {}, -): string { - const handler = new XmlStringOutputBufferHandler(); - canonicalizeDocument(handler, doc, options); - return handler.result; -} - /** * Canonicalize a subtree of an XML document to a buffer and invoke callbacks to process. * @@ -305,34 +299,3 @@ export function canonicalizeSubtree( isVisible: isVisible as unknown as XmlC14NIsVisibleCallback, }, /* wrapCascade */ false); } - -/** - * Canonicalize a subtree of an XML document and return as a string. - * - * This is a convenience helper that creates an isVisible callback to filter - * only nodes within the specified subtree. - * - * @param doc The document containing the subtree - * @param subtreeRoot The root node of the subtree to canonicalize - * @param options Canonicalization options (cannot include isVisible or nodeSet) - * @returns The canonical XML string for the subtree - * - * @example - * ```typescript - * const element = doc.get('//my-element'); - * const canonical = canonicalizeSubtreeToString(doc, element!, { - * mode: XmlC14NMode.XML_C14N_EXCLUSIVE_1_0, - * inclusiveNamespacePrefixes: ['ns1', 'ns2'], - * withComments: false - * }); - * ``` - */ -export function canonicalizeSubtreeToString( - doc: XmlDocument, - subtreeRoot: XmlNode, - options: C14NOptionsBase = {}, -): string { - const handler = new XmlStringOutputBufferHandler(); - canonicalizeSubtree(handler, doc, subtreeRoot, options); - return handler.result; -} diff --git a/src/document.mts b/src/document.mts index 1f7e504..db79a72 100644 --- a/src/document.mts +++ b/src/document.mts @@ -32,7 +32,7 @@ import type { XmlDocPtr, XmlParserCtxtPtr } from './libxml2raw.mjs'; import { disposeBy, XmlDisposable } from './disposable.mjs'; import { XmlDtd } from './dtd.mjs'; import { XmlStringOutputBufferHandler } from './utils.mjs'; -import { type C14NOptions, canonicalizeDocument, canonicalizeDocumentToString } from './c14n.mjs'; +import { type C14NOptions, canonicalizeDocument } from './c14n.mjs'; export enum ParseOption { XML_PARSE_DEFAULT = 0, @@ -513,10 +513,11 @@ export class XmlDocument extends XmlDisposable { * * @param options options to adjust the canonicalization behavior * @returns The canonicalized XML string - * @see {@link canonicalizeDocumentToString} * @see {@link canonicalize} */ canonicalizeToString(options?: C14NOptions): string { - return canonicalizeDocumentToString(this, options); + const handler = new XmlStringOutputBufferHandler(); + canonicalizeDocument(handler, this, options); + return handler.result; } } diff --git a/src/index.mts b/src/index.mts index 7aa3935..f4bb137 100644 --- a/src/index.mts +++ b/src/index.mts @@ -56,10 +56,6 @@ export { } from './utils.mjs'; export { XmlC14NMode, - canonicalizeDocument, - canonicalizeDocumentToString, - canonicalizeSubtree, - canonicalizeSubtreeToString, type C14NOptions, type C14NOptionsBase, type C14NOptionsWithCallback, diff --git a/src/libxml2.mts b/src/libxml2.mts index b0b0af1..5286ec1 100644 --- a/src/libxml2.mts +++ b/src/libxml2.mts @@ -14,6 +14,7 @@ import type { XmlXPathContextPtr, } from './libxml2raw.mjs'; import moduleLoader from './libxml2raw.mjs'; +import { ContextStorage } from './utils.mjs'; import { disposeBy, XmlDisposable } from './disposable.mjs'; const libxml2 = await moduleLoader(); @@ -21,39 +22,9 @@ libxml2._xmlInitParser(); // Export specific functions needed by other modules export const { - getValue, setValue, UTF8ToString, lengthBytesUTF8, stringToUTF8, addFunction, removeFunction, + getValue, UTF8ToString, lengthBytesUTF8, stringToUTF8, addFunction, } = libxml2; -/** - * Manage JS context object for wasm. - * - * In libxml2, a registration of callback often has a context/userdata pointer. - * But when it is in wasm, this pointer is essentially an integer. - * - * To support JS object as context/userdata, we store it in the map and access with an integer key. - * This key could be passed to the registration. - * And the callback use this key to retrieve the real object. - */ -export class ContextStorage { - private storage: Map = new Map(); - - private index = 0; - - allocate(value: T): number { - this.index += 1; - this.storage.set(this.index, value); - return this.index; - } - - free(index: number) { - this.storage.delete(index); - } - - get(index: number): T { - return this.storage.get(index)!; - } -} - /** * The base class for exceptions in this library. * @@ -664,6 +635,41 @@ export class DisposableMalloc extends XmlDisposable { } } +/** + * Helper to create a C-style NULL-terminated array of C strings. + * + * Allocates a single contiguous memory block containing: + * - First: the pointer array (n+1 pointers, last is NULL) + * - Then: the string data (all strings with null terminators) + * + * Memory layout: [ptr0][ptr1]...[ptrN][NULL][str0\0][str1\0]...[strN\0] + */ +export class CStringArrayWrapper extends DisposableMalloc { + constructor(strings: string[]) { + // Calculate total size needed + const pointerArraySize = (strings.length + 1) * 4; // +1 for NULL terminator + const stringSizes = strings.map((s) => libxml2.lengthBytesUTF8(s) + 1); + const totalStringSize = stringSizes.reduce((sum, size) => sum + size, 0); + const totalSize = pointerArraySize + totalStringSize; + + // Allocate single block + super(totalSize); + + // Write strings and set pointers + let stringOffset = this._ptr + pointerArraySize; + const ptrArrayBase = this._ptr / libxml2.HEAP32.BYTES_PER_ELEMENT; + strings.forEach((s, i) => { + // Set pointer to this string + libxml2.HEAP32[ptrArrayBase + i] = stringOffset; + // Write the string + libxml2.stringToUTF8(s, stringOffset, stringSizes[i]); + stringOffset += stringSizes[i]; + }); + // NULL terminate the pointer array + libxml2.HEAP32[ptrArrayBase + strings.length] = 0; + } +} + export const xmlAddChild = libxml2._xmlAddChild; export const xmlAddNextSibling = libxml2._xmlAddNextSibling; export const xmlAddPrevSibling = libxml2._xmlAddPrevSibling; diff --git a/src/nodes.mts b/src/nodes.mts index 91ed002..eabe705 100644 --- a/src/nodes.mts +++ b/src/nodes.mts @@ -43,7 +43,7 @@ import type { XmlDocPtr, XmlNodePtr, XmlNsPtr } from './libxml2raw.mjs'; import { XmlStringOutputBufferHandler } from './utils.mjs'; import { NamespaceMap, XmlXPath } from './xpath.mjs'; import { - canonicalizeSubtree, canonicalizeSubtreeToString, C14NOptionsBase, + canonicalizeSubtree, C14NOptionsBase, } from './c14n.mjs'; function compiledXPathEval(nodePtr: XmlNodePtr, xpath: XmlXPath) { @@ -207,11 +207,12 @@ export abstract class XmlNode { * * @param options options to adjust the canonicalization behavior * @returns The canonicalized XML string. - * @see {@link canonicalizeSubtreeToString} * @see {@link canonicalize} */ canonicalizeToString(options?: C14NOptionsBase): string { - return canonicalizeSubtreeToString(this.doc, this, options); + const handler = new XmlStringOutputBufferHandler(); + canonicalizeSubtree(handler, this.doc, this, options); + return handler.result; } /** diff --git a/src/utils.mts b/src/utils.mts index a0ae991..bd1cbfc 100644 --- a/src/utils.mts +++ b/src/utils.mts @@ -1,9 +1,38 @@ import { - DisposableMalloc, - lengthBytesUTF8, setValue, stringToUTF8, XmlInputProvider, XmlOutputBufferHandler, + XmlInputProvider, XmlOutputBufferHandler, } from './libxml2.mjs'; import { Pointer } from './libxml2raw.mjs'; +/** + * Manage JS context object for wasm. + * + * In libxml2, a registration of callback often has a context/userdata pointer. + * But when it is in wasm, this pointer is essentially an integer. + * + * To support JS object as context/userdata, we store it in the map and access with an integer key. + * This key could be passed to the registration. + * And the callback use this key to retrieve the real object. + */ +export class ContextStorage { + private storage: Map = new Map(); + + private index = 0; + + allocate(value: T): number { + this.index += 1; + this.storage.set(this.index, value); + return this.index; + } + + free(index: number) { + this.storage.delete(index); + } + + get(index: number): T { + return this.storage.get(index)!; + } +} + const bufferContexts: Map = new Map(); let contextIndex = 1; @@ -115,32 +144,3 @@ export class XmlStringOutputBufferHandler implements XmlOutputBufferHandler { return this._result; } } - -/** - * Helper to create a C-style array of C strings - */ -export class CStringArrayWrapper extends DisposableMalloc { - private cStrings: DisposableMalloc[] = []; - - constructor(strings: string[]) { - // allocate pointer array (+1 for NULL terminator) - super((strings.length + 1) * 4); - - this.cStrings = strings.map((s) => { - const len = lengthBytesUTF8(s) + 1; - const mem = new DisposableMalloc(len); - stringToUTF8(s, mem._ptr, len); - return mem; - }); - - this.cStrings.forEach(({ _ptr }, i) => { - setValue(this._ptr + i * 4, _ptr, 'i32'); - }); - setValue(this._ptr + this.cStrings.length * 4, 0, 'i32'); - } - - [Symbol.dispose](): void { - this.cStrings.forEach((dm) => dm.dispose()); - super[Symbol.dispose](); - } -} diff --git a/test/crossplatform/c14n.spec.mts b/test/crossplatform/c14n.spec.mts index 270e1a8..cf106a2 100644 --- a/test/crossplatform/c14n.spec.mts +++ b/test/crossplatform/c14n.spec.mts @@ -1,6 +1,6 @@ import { assert, expect } from 'chai'; import { - XmlDocument, diag, XmlC14NMode, canonicalizeSubtreeToString, XmlStringOutputBufferHandler, + XmlDocument, diag, XmlC14NMode, XmlStringOutputBufferHandler, } from '@libxml2-wasm/lib/index.mjs'; import { XmlTreeCommonStruct } from '@libxml2-wasm/lib/libxml2.mjs'; @@ -85,6 +85,17 @@ describe('C14N (XML Canonicalization)', () => { }); }); + describe('doc.canonicalize(handler, options)', () => { + it('should canonicalize document using handler API', () => { + const xmlString = 'text'; + usingXmlDocument(XmlDocument.fromString(xmlString), (doc) => { + const handler = new XmlStringOutputBufferHandler(); + doc.canonicalize(handler); + expect(handler.result).to.equal(xmlString); + }); + }); + }); + describe('canonicalizeSubtree', () => { it('should canonicalize only a specific subtree', () => { const xmlString = 'textother'; @@ -94,7 +105,7 @@ describe('C14N (XML Canonicalization)', () => { expect(node).to.not.be.null; assert(node != null); - const canonical = canonicalizeSubtreeToString(doc, node, { + const canonical = node.canonicalizeToString({ mode: XmlC14NMode.XML_C14N_EXCLUSIVE_1_0, }); @@ -111,7 +122,7 @@ describe('C14N (XML Canonicalization)', () => { expect(node).to.not.be.null; assert(node != null); - const canonical = canonicalizeSubtreeToString(doc, node, { + const canonical = node.canonicalizeToString({ mode: XmlC14NMode.XML_C14N_EXCLUSIVE_1_0, inclusiveNamespacePrefixes: inclusiveNamespaces, }); From e00f917464a14edc6d3fcbea4b3326879aad67cc Mon Sep 17 00:00:00 2001 From: shunkica Date: Tue, 25 Nov 2025 11:33:21 +0100 Subject: [PATCH 06/11] replaced DisposableMalloc and CStringArrayWrapper with allocCStringArray(string[]) and free(Pointer) --- src/c14n.mts | 13 +++++----- src/libxml2.mts | 63 ++++++++++++++++++++++--------------------------- 2 files changed, 34 insertions(+), 42 deletions(-) diff --git a/src/c14n.mts b/src/c14n.mts index 449360f..5cf722d 100644 --- a/src/c14n.mts +++ b/src/c14n.mts @@ -1,6 +1,7 @@ import { addFunction, - CStringArrayWrapper, + allocCStringArray, + free, xmlC14NExecute, xmlOutputBufferCreateIO, xmlOutputBufferClose, @@ -182,7 +183,7 @@ function canonicalizeInternal( } let outputBufferPtr: XmlOutputBufferPtr | null = null; - let prefixArray: CStringArrayWrapper | null = null; + let prefixArrayPtr: Pointer = 0; let contextIndex: number = 0; try { @@ -204,7 +205,7 @@ function canonicalizeInternal( // Handle inclusive namespace prefixes if (options.inclusiveNamespacePrefixes) { - prefixArray = new CStringArrayWrapper(options.inclusiveNamespacePrefixes); + prefixArrayPtr = allocCStringArray(options.inclusiveNamespacePrefixes); } const mode = options.mode ?? XmlC14NMode.XML_C14N_1_0; @@ -215,7 +216,7 @@ function canonicalizeInternal( contextIndex !== 0 ? c14nIsVisibleCallback : 0 as Pointer, contextIndex, // user_data is the storage index mode, - prefixArray ? prefixArray._ptr : 0, + prefixArrayPtr, withComments, outputBufferPtr, ); @@ -224,9 +225,7 @@ function canonicalizeInternal( throw new XmlError('Failed to canonicalize XML document'); } } finally { - if (prefixArray) { - prefixArray.dispose(); - } + if (prefixArrayPtr) free(prefixArrayPtr); if (outputBufferPtr) { xmlOutputBufferClose(outputBufferPtr); } diff --git a/src/libxml2.mts b/src/libxml2.mts index 5286ec1..99b3f57 100644 --- a/src/libxml2.mts +++ b/src/libxml2.mts @@ -15,7 +15,6 @@ import type { } from './libxml2raw.mjs'; import moduleLoader from './libxml2raw.mjs'; import { ContextStorage } from './utils.mjs'; -import { disposeBy, XmlDisposable } from './disposable.mjs'; const libxml2 = await moduleLoader(); libxml2._xmlInitParser(); @@ -625,16 +624,6 @@ export function xmlSaveSetIndentString( return withStringUTF8(indent, (buf) => libxml2._xmlSaveSetIndentString(ctxt, buf)); } -/** - * We probably don't want to expose malloc/free directly? - */ -@disposeBy(libxml2._free) -export class DisposableMalloc extends XmlDisposable { - constructor(size: number) { - super(libxml2._malloc(size)); - } -} - /** * Helper to create a C-style NULL-terminated array of C strings. * @@ -643,33 +632,37 @@ export class DisposableMalloc extends XmlDisposable { * - Then: the string data (all strings with null terminators) * * Memory layout: [ptr0][ptr1]...[ptrN][NULL][str0\0][str1\0]...[strN\0] + * + * @returns The pointer to the allocated memory. Caller must free with {@link free}. */ -export class CStringArrayWrapper extends DisposableMalloc { - constructor(strings: string[]) { - // Calculate total size needed - const pointerArraySize = (strings.length + 1) * 4; // +1 for NULL terminator - const stringSizes = strings.map((s) => libxml2.lengthBytesUTF8(s) + 1); - const totalStringSize = stringSizes.reduce((sum, size) => sum + size, 0); - const totalSize = pointerArraySize + totalStringSize; - - // Allocate single block - super(totalSize); - - // Write strings and set pointers - let stringOffset = this._ptr + pointerArraySize; - const ptrArrayBase = this._ptr / libxml2.HEAP32.BYTES_PER_ELEMENT; - strings.forEach((s, i) => { - // Set pointer to this string - libxml2.HEAP32[ptrArrayBase + i] = stringOffset; - // Write the string - libxml2.stringToUTF8(s, stringOffset, stringSizes[i]); - stringOffset += stringSizes[i]; - }); - // NULL terminate the pointer array - libxml2.HEAP32[ptrArrayBase + strings.length] = 0; - } +export function allocCStringArray(strings: string[]): Pointer { + // Calculate total size needed + const pointerArraySize = (strings.length + 1) * 4; // +1 for NULL terminator + const stringSizes = strings.map((s) => libxml2.lengthBytesUTF8(s) + 1); + const totalStringSize = stringSizes.reduce((sum, size) => sum + size, 0); + const totalSize = pointerArraySize + totalStringSize; + + // Allocate single block + const ptr = libxml2._malloc(totalSize); + + // Write strings and set pointers + let stringOffset = ptr + pointerArraySize; + const ptrArrayBase = ptr / libxml2.HEAP32.BYTES_PER_ELEMENT; + strings.forEach((s, i) => { + // Set pointer to this string + libxml2.HEAP32[ptrArrayBase + i] = stringOffset; + // Write the string + libxml2.stringToUTF8(s, stringOffset, stringSizes[i]); + stringOffset += stringSizes[i]; + }); + // NULL terminate the pointer array + libxml2.HEAP32[ptrArrayBase + strings.length] = 0; + + return ptr; } +export const free = libxml2._free; + export const xmlAddChild = libxml2._xmlAddChild; export const xmlAddNextSibling = libxml2._xmlAddNextSibling; export const xmlAddPrevSibling = libxml2._xmlAddPrevSibling; From b48b429022f6f1268661ad495813a7e92e0c5d4f Mon Sep 17 00:00:00 2001 From: shunkica Date: Tue, 25 Nov 2025 11:49:30 +0100 Subject: [PATCH 07/11] refactor: remove unused functions --- binding/exported-runtime-functions.txt | 2 -- src/document.mts | 2 +- src/libxml2raw.d.mts | 13 ------------- src/nodes.mts | 2 +- 4 files changed, 2 insertions(+), 17 deletions(-) diff --git a/binding/exported-runtime-functions.txt b/binding/exported-runtime-functions.txt index 188cbca..7d758a2 100644 --- a/binding/exported-runtime-functions.txt +++ b/binding/exported-runtime-functions.txt @@ -4,6 +4,4 @@ UTF8ToString addFunction getValue lengthBytesUTF8 -removeFunction -setValue stringToUTF8 diff --git a/src/document.mts b/src/document.mts index db79a72..c529f23 100644 --- a/src/document.mts +++ b/src/document.mts @@ -517,7 +517,7 @@ export class XmlDocument extends XmlDisposable { */ canonicalizeToString(options?: C14NOptions): string { const handler = new XmlStringOutputBufferHandler(); - canonicalizeDocument(handler, this, options); + this.canonicalize(handler, options); return handler.result; } } diff --git a/src/libxml2raw.d.mts b/src/libxml2raw.d.mts index 612787a..e0e3c09 100644 --- a/src/libxml2raw.d.mts +++ b/src/libxml2raw.d.mts @@ -40,7 +40,6 @@ export class LibXml2 { _xmlAddNextSibling(prev: XmlNodePtr, cur: XmlNodePtr): XmlNodePtr; _xmlAddPrevSibling(next: XmlNodePtr, cur: XmlNodePtr): XmlNodePtr; _xmlCleanupInputCallbacks(): void; - _xmlCopyNode(node: XmlNodePtr, extended: number): XmlNodePtr; _xmlCtxtParseDtd( ctxt: XmlParserCtxtPtr, input: XmlParserInputPtr, @@ -65,7 +64,6 @@ export class LibXml2 { _xmlFreeParserCtxt(ctxt: XmlParserCtxtPtr): void; _xmlDocGetRootElement(doc: XmlDocPtr): XmlNodePtr; _xmlDocSetRootElement(doc: XmlDocPtr, root: XmlNodePtr): XmlNodePtr; - _xmlFree(ptr: Pointer): void; _xmlFreeDoc(Doc: XmlDocPtr): void; _xmlFreeDtd(dtd: XmlDtdPtr): void; _xmlGetIntSubset(doc: XmlDocPtr): XmlDtdPtr; @@ -74,7 +72,6 @@ export class LibXml2 { _xmlHasNsProp(node: XmlNodePtr, name: CString, namespace: CString): XmlAttrPtr; _xmlInitParser(): void; _xmlNewDoc(): XmlDocPtr; - _xmlNewDtd(): XmlDtdPtr; _xmlNewCDataBlock(doc: XmlDocPtr, content: CString, len: number): XmlNodePtr; _xmlNewDocComment(doc: XmlDocPtr, content: CString): XmlNodePtr; _xmlNewDocNode(doc: XmlDocPtr, ns: XmlNsPtr, name: CString, content: CString): XmlNodePtr; @@ -164,14 +161,6 @@ export class LibXml2 { _xmlSchemaValidateDoc(ctx: XmlSchemaValidCtxtPtr, doc: XmlDocPtr): number; _xmlSchemaValidateOneElement(ctx: XmlSchemaValidCtxtPtr, elem: XmlNodePtr): number; _xmlUnlinkNode(cur: XmlNodePtr): void; - _xmlC14NDocDumpMemory( - doc: XmlDocPtr, - nodeset: Pointer, - mode: number, - inclusiveNamespaces: Pointer, - withComments: number, - docTxtPtr: Pointer, - ): number; _xmlC14NExecute( doc: XmlDocPtr, is_visible_callback: Pointer, @@ -193,8 +182,6 @@ export class LibXml2 { addFunction(func: Function, sig: string): Pointer; getValue(ptr: Pointer, type: string): number; lengthBytesUTF8(str: string): number; - removeFunction(ptr: Pointer): void; - setValue(ptr: Pointer, value: number, type: string): void; stringToUTF8(str: string, outPtr: CString, maxBytesToWrite: number): CString; } diff --git a/src/nodes.mts b/src/nodes.mts index eabe705..f12b9d1 100644 --- a/src/nodes.mts +++ b/src/nodes.mts @@ -211,7 +211,7 @@ export abstract class XmlNode { */ canonicalizeToString(options?: C14NOptionsBase): string { const handler = new XmlStringOutputBufferHandler(); - canonicalizeSubtree(handler, this.doc, this, options); + this.canonicalize(handler, options); return handler.result; } From 2a641144a015e45439637f494405ad4c03bbb53c Mon Sep 17 00:00:00 2001 From: shunkica Date: Tue, 25 Nov 2025 18:59:37 +0100 Subject: [PATCH 08/11] refactor: c14n options --- src/c14n.mts | 30 +++++++++++++++--------------- src/index.mts | 4 +--- src/nodes.mts | 6 +++--- 3 files changed, 19 insertions(+), 21 deletions(-) diff --git a/src/c14n.mts b/src/c14n.mts index 5cf722d..6bed4b8 100644 --- a/src/c14n.mts +++ b/src/c14n.mts @@ -102,7 +102,7 @@ export type XmlC14NIsVisibleCallback = (node: XmlNodePtr, parent: XmlNodePtr) => /** * Options for XML canonicalization */ -export interface C14NOptionsBase { +export interface C14NOptions { /** The canonicalization mode to use * @default XmlC14NMode.XML_C14N_1_0 */ @@ -117,25 +117,22 @@ export interface C14NOptionsBase { * Only applies when mode is XML_C14N_EXCLUSIVE_1_0 */ inclusiveNamespacePrefixes?: string[]; -} -export interface C14NOptionsWithCallback extends C14NOptionsBase { /** Custom callback to determine node visibility - * Cannot be used together with nodeSet + * Must not be used together with {@link nodeSet} */ - isVisible: XmlC14NIsVisibleCallback; - nodeSet?: never; -} + isVisible?: XmlC14NIsVisibleCallback; -export interface C14NOptionsWithNodeSet extends C14NOptionsBase { /** Set of nodes to include in canonicalization - * Cannot be used together with isVisible + * Must not be used together with {@link isVisible} */ - nodeSet: Set; - isVisible?: never; + nodeSet?: Set; } -export type C14NOptions = C14NOptionsWithCallback | C14NOptionsWithNodeSet | C14NOptionsBase; +/** + * C14N options without filtering callbacks (for subtree canonicalization) + */ +export type SubtreeC14NOptions = Omit; /** * Check if a node is within a subtree rooted at a specific node by walking @@ -172,7 +169,9 @@ function canonicalizeInternal( cascade: boolean = true, ): void { const hasIsVisible = (opts: C14NOptions): - opts is C14NOptions & { isVisible: XmlC14NIsVisibleCallback } => typeof (opts as any).isVisible === 'function'; + opts is C14NOptions & { + isVisible: XmlC14NIsVisibleCallback + } => typeof (opts as any).isVisible === 'function'; const hasNodeSet = (opts: C14NOptions): opts is C14NOptions & { nodeSet: Set } => (opts as any).nodeSet instanceof Set; @@ -195,7 +194,8 @@ function canonicalizeInternal( const context: C14NCallbackContext = { jsCallback: hasIsVisible(options) ? options.isVisible : null, rootPtrs: hasNodeSet(options) - ? new Set(Array.from(options.nodeSet).map((n) => n._nodePtr)) + ? new Set(Array.from(options.nodeSet) + .map((n) => n._nodePtr)) : null, cascade, invisible: cascade ? new Set() : null, @@ -286,7 +286,7 @@ export function canonicalizeSubtree( handler: XmlOutputBufferHandler, doc: XmlDocument, subtreeRoot: XmlNode, - options: C14NOptionsBase = {}, + options: SubtreeC14NOptions = {}, ): void { const subtreeRootPtr = subtreeRoot._nodePtr; const isVisible = (nodePtr: number, parentPtr: number) => ( diff --git a/src/index.mts b/src/index.mts index f4bb137..c216ba2 100644 --- a/src/index.mts +++ b/src/index.mts @@ -57,8 +57,6 @@ export { export { XmlC14NMode, type C14NOptions, - type C14NOptionsBase, - type C14NOptionsWithCallback, - type C14NOptionsWithNodeSet, + type SubtreeC14NOptions, type XmlC14NIsVisibleCallback, } from './c14n.mjs'; diff --git a/src/nodes.mts b/src/nodes.mts index f12b9d1..5bd5942 100644 --- a/src/nodes.mts +++ b/src/nodes.mts @@ -43,7 +43,7 @@ import type { XmlDocPtr, XmlNodePtr, XmlNsPtr } from './libxml2raw.mjs'; import { XmlStringOutputBufferHandler } from './utils.mjs'; import { NamespaceMap, XmlXPath } from './xpath.mjs'; import { - canonicalizeSubtree, C14NOptionsBase, + canonicalizeSubtree, SubtreeC14NOptions, } from './c14n.mjs'; function compiledXPathEval(nodePtr: XmlNodePtr, xpath: XmlXPath) { @@ -198,7 +198,7 @@ export abstract class XmlNode { * @see {@link canonicalizeSubtree} * @see {@link canonicalizeToString} */ - canonicalize(handler: XmlOutputBufferHandler, options?: C14NOptionsBase): void { + canonicalize(handler: XmlOutputBufferHandler, options?: SubtreeC14NOptions): void { canonicalizeSubtree(handler, this.doc, this, options); } @@ -209,7 +209,7 @@ export abstract class XmlNode { * @returns The canonicalized XML string. * @see {@link canonicalize} */ - canonicalizeToString(options?: C14NOptionsBase): string { + canonicalizeToString(options?: SubtreeC14NOptions): string { const handler = new XmlStringOutputBufferHandler(); this.canonicalize(handler, options); return handler.result; From 92a5f3501a8841954429880b373d3b6211a3bb3a Mon Sep 17 00:00:00 2001 From: shunkica Date: Tue, 25 Nov 2025 19:26:19 +0100 Subject: [PATCH 09/11] refactor: format, remove unnecessary exports, ignore unreachable code in coverage --- src/c14n.mts | 4 ++-- src/libxml2.mts | 9 +++++---- src/nodes.mts | 4 +--- src/utils.mts | 4 +--- 4 files changed, 9 insertions(+), 12 deletions(-) diff --git a/src/c14n.mts b/src/c14n.mts index 6bed4b8..6cadeaa 100644 --- a/src/c14n.mts +++ b/src/c14n.mts @@ -69,8 +69,7 @@ const c14nIsVisibleCallback = addFunction( } return res; } - - // No callback or nodeSet - include all nodes + /* c8 ignore next 2, callback is not registered if neither is present */ return 1; }, 'iiii', @@ -221,6 +220,7 @@ function canonicalizeInternal( outputBufferPtr, ); + /* c8 ignore next 3, defensive code */ if (result < 0) { throw new XmlError('Failed to canonicalize XML document'); } diff --git a/src/libxml2.mts b/src/libxml2.mts index 99b3f57..7b612b9 100644 --- a/src/libxml2.mts +++ b/src/libxml2.mts @@ -19,10 +19,11 @@ import { ContextStorage } from './utils.mjs'; const libxml2 = await moduleLoader(); libxml2._xmlInitParser(); -// Export specific functions needed by other modules -export const { - getValue, UTF8ToString, lengthBytesUTF8, stringToUTF8, addFunction, -} = libxml2; +/** + * Export runtime functions needed by other modules. + * @internal + */ +export const { addFunction } = libxml2; /** * The base class for exceptions in this library. diff --git a/src/nodes.mts b/src/nodes.mts index 5bd5942..b977fce 100644 --- a/src/nodes.mts +++ b/src/nodes.mts @@ -42,9 +42,7 @@ import { import type { XmlDocPtr, XmlNodePtr, XmlNsPtr } from './libxml2raw.mjs'; import { XmlStringOutputBufferHandler } from './utils.mjs'; import { NamespaceMap, XmlXPath } from './xpath.mjs'; -import { - canonicalizeSubtree, SubtreeC14NOptions, -} from './c14n.mjs'; +import { canonicalizeSubtree, SubtreeC14NOptions } from './c14n.mjs'; function compiledXPathEval(nodePtr: XmlNodePtr, xpath: XmlXPath) { const context = xmlXPathNewContext(XmlNodeStruct.doc(nodePtr)); diff --git a/src/utils.mts b/src/utils.mts index bd1cbfc..601ad32 100644 --- a/src/utils.mts +++ b/src/utils.mts @@ -1,6 +1,4 @@ -import { - XmlInputProvider, XmlOutputBufferHandler, -} from './libxml2.mjs'; +import { XmlInputProvider, XmlOutputBufferHandler } from './libxml2.mjs'; import { Pointer } from './libxml2raw.mjs'; /** From 357dcfeee644845cd164abbed8f08a95c52fb851 Mon Sep 17 00:00:00 2001 From: shunkica Date: Fri, 28 Nov 2025 09:18:27 +0100 Subject: [PATCH 10/11] tests: add tests for all supported node types --- test/crossplatform/c14n.spec.mts | 109 ++++++++++++++++++++++++++----- 1 file changed, 93 insertions(+), 16 deletions(-) diff --git a/test/crossplatform/c14n.spec.mts b/test/crossplatform/c14n.spec.mts index cf106a2..d4e4f8f 100644 --- a/test/crossplatform/c14n.spec.mts +++ b/test/crossplatform/c14n.spec.mts @@ -1,6 +1,15 @@ import { assert, expect } from 'chai'; import { - XmlDocument, diag, XmlC14NMode, XmlStringOutputBufferHandler, + diag, + XmlDocument, + XmlC14NMode, + XmlStringOutputBufferHandler, + XmlElement, + XmlText, + XmlComment, + XmlCData, + XmlAttribute, + XmlEntityReference, } from '@libxml2-wasm/lib/index.mjs'; import { XmlTreeCommonStruct } from '@libxml2-wasm/lib/libxml2.mjs'; @@ -101,9 +110,7 @@ describe('C14N (XML Canonicalization)', () => { const xmlString = 'textother'; usingXmlDocument(XmlDocument.fromString(xmlString), (doc) => { const node = doc.get('//ns1:child', { ns1: 'uri:ns1' }); - - expect(node).to.not.be.null; - assert(node != null); + assert(node instanceof XmlElement); const canonical = node.canonicalizeToString({ mode: XmlC14NMode.XML_C14N_EXCLUSIVE_1_0, @@ -119,8 +126,7 @@ describe('C14N (XML Canonicalization)', () => { usingXmlDocument(XmlDocument.fromString(xmlString), (doc) => { const inclusiveNamespaces = ['ns3']; const node = doc.get('//ns1:child', { ns1: 'uri:ns1' }); - expect(node).to.not.be.null; - assert(node != null); + assert(node instanceof XmlElement); const canonical = node.canonicalizeToString({ mode: XmlC14NMode.XML_C14N_EXCLUSIVE_1_0, @@ -252,8 +258,7 @@ describe('C14N (XML Canonicalization)', () => { const xmlString = 'textother'; usingXmlDocument(XmlDocument.fromString(xmlString), (doc) => { const node = doc.get('//ns1:child', { ns1: 'uri:ns1' }); - expect(node).to.not.be.null; - assert(node != null); + assert(node instanceof XmlElement); const canonical = node.canonicalizeToString({ mode: XmlC14NMode.XML_C14N_EXCLUSIVE_1_0, @@ -268,8 +273,7 @@ describe('C14N (XML Canonicalization)', () => { const xmlString = 'text'; usingXmlDocument(XmlDocument.fromString(xmlString), (doc) => { const node = doc.get('//child'); - expect(node).to.not.be.null; - assert(node != null); + assert(node instanceof XmlElement); const canonical = node.canonicalizeToString(); @@ -282,8 +286,7 @@ describe('C14N (XML Canonicalization)', () => { const xmlString = 'text'; usingXmlDocument(XmlDocument.fromString(xmlString), (doc) => { const node = doc.get('//ns1:child', { ns1: 'uri:ns1' }); - expect(node).to.not.be.null; - assert(node != null); + assert(node instanceof XmlElement); const canonical = node.canonicalizeToString({ mode: XmlC14NMode.XML_C14N_EXCLUSIVE_1_0, @@ -299,8 +302,7 @@ describe('C14N (XML Canonicalization)', () => { const xmlString = 'text'; usingXmlDocument(XmlDocument.fromString(xmlString), (doc) => { const node = doc.get('//child'); - expect(node).to.not.be.null; - assert(node != null); + assert(node instanceof XmlElement); const canonical = node.canonicalizeToString({ withComments: true }); @@ -313,8 +315,7 @@ describe('C14N (XML Canonicalization)', () => { const xmlString = 'textother'; usingXmlDocument(XmlDocument.fromString(xmlString), (doc) => { const node = doc.get('//ns1:child', { ns1: 'uri:ns1' }); - expect(node).to.not.be.null; - assert(node != null); + assert(node instanceof XmlElement); const handler = new XmlStringOutputBufferHandler(); node.canonicalize(handler, { @@ -326,4 +327,80 @@ describe('C14N (XML Canonicalization)', () => { }); }); }); + + describe('canonicalize on different node types', () => { + it('should canonicalize XmlText node', () => { + const xmlString = 'Left & Right'; + usingXmlDocument(XmlDocument.fromString(xmlString), (doc) => { + const textNode = doc.get('//child/text()'); + assert(textNode instanceof XmlText); + + const canonical = textNode.canonicalizeToString(); + expect(canonical).to.be.a('string'); + // Text node canonicalization includes the text content + expect(canonical).to.equal('Left & Right'); + }); + }); + + it('should canonicalize XmlComment', () => { + const xmlString = 'text'; + + usingXmlDocument(XmlDocument.fromString(xmlString), (doc) => { + const commentNode = doc.get('//child/comment()'); + assert(commentNode instanceof XmlComment); + + // 1. Default Canonicalization (Comments are excluded) + const canonical = commentNode.canonicalizeToString(); + expect(canonical).to.equal(''); + + // 2. Canonicalization with Comments + const canonicalWithComments = commentNode.canonicalizeToString({ + withComments: true, + }); + // libxml2 canonicalizes comments with a trailing newline + expect(canonicalWithComments).to.equal('\n'); + }); + }); + + it('should canonicalize XmlCData', () => { + const xmlString = 'text'; + + usingXmlDocument(XmlDocument.fromString(xmlString), (doc) => { + const cdataNode = doc.get('//child/text()'); + assert(cdataNode instanceof XmlCData); + + const canonical = cdataNode.canonicalizeToString(); + expect(canonical).to.be.a('string'); + + // C14N strips the wrapper and treats it as raw text + expect(canonical).to.equal('cdata content'); + }); + }); + + it('should canonicalize XmlAttribute', () => { + const xmlString = 'text'; + + usingXmlDocument(XmlDocument.fromString(xmlString), (doc) => { + const attr = doc.get('//child/@attr1'); + assert(attr instanceof XmlAttribute); + + const canonical = attr.canonicalizeToString(); + expect(canonical).to.be.a('string'); + + // libxml2 canonicalizes attributes with leading space + expect(canonical).to.equal(' attr1="value1"'); + }); + }); + + it('should throw error when canonicalizing XmlEntityReference', () => { + const xmlString = ' ]>&myent;'; + + usingXmlDocument(XmlDocument.fromString(xmlString), (doc) => { + const entityRef = doc.root.firstChild; + assert(entityRef instanceof XmlEntityReference); + + expect(() => entityRef.canonicalizeToString()).to.throw('Failed to canonicalize XML document'); + }); + }); + }); }); From 8dfc5b55152485fbeb2521a3faa92bf44386e69b Mon Sep 17 00:00:00 2001 From: shunkica Date: Fri, 28 Nov 2025 11:05:41 +0100 Subject: [PATCH 11/11] add explanation for error --- test/crossplatform/c14n.spec.mts | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/test/crossplatform/c14n.spec.mts b/test/crossplatform/c14n.spec.mts index d4e4f8f..f87a94f 100644 --- a/test/crossplatform/c14n.spec.mts +++ b/test/crossplatform/c14n.spec.mts @@ -399,6 +399,10 @@ describe('C14N (XML Canonicalization)', () => { const entityRef = doc.root.firstChild; assert(entityRef instanceof XmlEntityReference); + /** + * C14N states that all entity references must be expanded. Therefore, the concept + * of canonicalizing an Entity Reference node as a distinct object is paradoxical + */ expect(() => entityRef.canonicalizeToString()).to.throw('Failed to canonicalize XML document'); }); });