diff --git a/binding/exported-functions.txt b/binding/exported-functions.txt index 900a6c5..460231f 100644 --- a/binding/exported-functions.txt +++ b/binding/exported-functions.txt @@ -3,6 +3,7 @@ _malloc _xmlAddChild _xmlAddNextSibling _xmlAddPrevSibling +_xmlC14NExecute _xmlCleanupInputCallbacks _xmlCtxtParseDtd _xmlCtxtReadMemory @@ -31,6 +32,8 @@ _xmlNewParserCtxt _xmlNewReference _xmlNodeGetContent _xmlNodeSetContentLen +_xmlOutputBufferClose +_xmlOutputBufferCreateIO _xmlRegisterInputCallbacks _xmlRelaxNGFree _xmlRelaxNGFreeParserCtxt diff --git a/src/c14n.mts b/src/c14n.mts new file mode 100644 index 0000000..6cadeaa --- /dev/null +++ b/src/c14n.mts @@ -0,0 +1,300 @@ +import { + addFunction, + allocCStringArray, + free, + xmlC14NExecute, + xmlOutputBufferCreateIO, + xmlOutputBufferClose, + XmlError, + XmlOutputBufferHandler, + XmlTreeCommonStruct, +} from './libxml2.mjs'; +import type { XmlNode } from './nodes.mjs'; +import type { + XmlDocPtr, XmlOutputBufferPtr, Pointer, XmlNodePtr, +} from './libxml2raw.mjs'; +import type { XmlDocument } from './document.mjs'; +import { ContextStorage } from './utils.mjs'; + +/** + * Context for the C14N isVisible callback. + * @internal + */ +interface C14NCallbackContext { + /** The JS callback to invoke, or null if using nodeSet mode */ + jsCallback: XmlC14NIsVisibleCallback | null; + /** For nodeSet mode: set of root pointers to check against */ + rootPtrs: Set | null; + /** Whether to cascade invisibility to descendants */ + cascade: boolean; + /** Tracks nodes made invisible (for cascade mode) */ + invisible: Set | null; +} + +const c14nCallbackStorage = new ContextStorage(); + +/** + * Global C14N visibility callback - created once at module initialization. + * Signature: int(void* user_data, xmlNodePtr node, xmlNodePtr parent) + * @internal + */ +const c14nIsVisibleCallback = addFunction( + (userDataIndex: number, nodePtr: number, parentPtr: number): number => { + const ctx = c14nCallbackStorage.get(userDataIndex); + + // Handle nodeSet mode + if (ctx.rootPtrs !== null) { + // Visible if node is a selected root, or lies within any selected root subtree + if (ctx.rootPtrs.has(nodePtr)) return 1; + let cur = parentPtr; + while (cur !== 0) { + if (ctx.rootPtrs.has(cur)) return 1; + cur = XmlTreeCommonStruct.parent(cur); + } + return 0; + } + + // Handle isVisible callback mode + if (ctx.jsCallback !== null) { + // Cascade invisibility check + if (ctx.cascade && ctx.invisible) { + if (parentPtr !== 0 && ctx.invisible.has(parentPtr)) { + ctx.invisible.add(nodePtr); + return 0; + } + } + const res = ctx.jsCallback(nodePtr, parentPtr) ? 1 : 0; + if (ctx.cascade && ctx.invisible && res === 0) { + ctx.invisible.add(nodePtr); + } + return res; + } + /* c8 ignore next 2, callback is not registered if neither is present */ + return 1; + }, + 'iiii', +) as Pointer; + +/** + * C14N (Canonical XML) modes supported by libxml2 + * @see http://www.w3.org/TR/xml-c14n + * @see http://www.w3.org/TR/xml-exc-c14n + */ +export const XmlC14NMode = { + /** Original C14N 1.0 specification */ + XML_C14N_1_0: 0, + /** Exclusive C14N 1.0 (omits unused namespace declarations) */ + XML_C14N_EXCLUSIVE_1_0: 1, + /** C14N 1.1 specification */ + XML_C14N_1_1: 2, +} as const; + +/** + * Callback to determine if a node should be included in canonicalization. + * + * @param node The node being evaluated + * @param parent The parent of the node being evaluated + * @returns true if the node should be included, false otherwise + */ +export type XmlC14NIsVisibleCallback = (node: XmlNodePtr, parent: XmlNodePtr) => boolean; + +/** + * Options for XML canonicalization + */ +export interface C14NOptions { + /** The canonicalization mode to use + * @default XmlC14NMode.XML_C14N_1_0 + */ + mode?: typeof XmlC14NMode[keyof typeof XmlC14NMode]; + + /** Whether to include comments in the canonicalized output + * @default false + */ + withComments?: boolean; + + /** List of inclusive namespace prefixes for exclusive canonicalization + * Only applies when mode is XML_C14N_EXCLUSIVE_1_0 + */ + inclusiveNamespacePrefixes?: string[]; + + /** Custom callback to determine node visibility + * Must not be used together with {@link nodeSet} + */ + isVisible?: XmlC14NIsVisibleCallback; + + /** Set of nodes to include in canonicalization + * Must not be used together with {@link isVisible} + */ + nodeSet?: Set; +} + +/** + * C14N options without filtering callbacks (for subtree canonicalization) + */ +export type SubtreeC14NOptions = Omit; + +/** + * Check if a node is within a subtree rooted at a specific node by walking + * up the parent chain using the libxml-provided parent pointer. + * + * Important: Namespace declaration nodes (xmlNs) are not part of the tree and + * don't have a normal parent field. libxml2 calls the visibility callback with + * the owning element as `parentPtr`, so we must start walking from `parentPtr` + * rather than dereferencing the node. + * @internal + */ +function isNodeInSubtree(nodePtr: number, parentPtr: number, rootPtr: number): boolean { + if (nodePtr === rootPtr) { + return true; + } + let currentPtr = parentPtr; + while (currentPtr !== 0) { + if (currentPtr === rootPtr) { + return true; + } + currentPtr = XmlTreeCommonStruct.parent(currentPtr); + } + return false; +} + +/** + * Internal implementation using xmlC14NExecute + * @internal + */ +function canonicalizeInternal( + handler: XmlOutputBufferHandler, + docPtr: XmlDocPtr, + options: C14NOptions = {}, + cascade: boolean = true, +): void { + const hasIsVisible = (opts: C14NOptions): + opts is C14NOptions & { + isVisible: XmlC14NIsVisibleCallback + } => typeof (opts as any).isVisible === 'function'; + + const hasNodeSet = (opts: C14NOptions): + opts is C14NOptions & { nodeSet: Set } => (opts as any).nodeSet instanceof Set; + + // Validate mutually exclusive options + if (hasIsVisible(options) && hasNodeSet(options)) { + throw new XmlError('Cannot specify both isVisible and nodeSet'); + } + + let outputBufferPtr: XmlOutputBufferPtr | null = null; + let prefixArrayPtr: Pointer = 0; + let contextIndex: number = 0; + + try { + // Create output buffer using IO callbacks + outputBufferPtr = xmlOutputBufferCreateIO(handler); + + // Build callback context based on options + if (hasIsVisible(options) || hasNodeSet(options)) { + const context: C14NCallbackContext = { + jsCallback: hasIsVisible(options) ? options.isVisible : null, + rootPtrs: hasNodeSet(options) + ? new Set(Array.from(options.nodeSet) + .map((n) => n._nodePtr)) + : null, + cascade, + invisible: cascade ? new Set() : null, + }; + contextIndex = c14nCallbackStorage.allocate(context); + } + + // Handle inclusive namespace prefixes + if (options.inclusiveNamespacePrefixes) { + prefixArrayPtr = allocCStringArray(options.inclusiveNamespacePrefixes); + } + + const mode = options.mode ?? XmlC14NMode.XML_C14N_1_0; + const withComments = options.withComments ? 1 : 0; + + const result = xmlC14NExecute( + docPtr, + contextIndex !== 0 ? c14nIsVisibleCallback : 0 as Pointer, + contextIndex, // user_data is the storage index + mode, + prefixArrayPtr, + withComments, + outputBufferPtr, + ); + + /* c8 ignore next 3, defensive code */ + if (result < 0) { + throw new XmlError('Failed to canonicalize XML document'); + } + } finally { + if (prefixArrayPtr) free(prefixArrayPtr); + if (outputBufferPtr) { + xmlOutputBufferClose(outputBufferPtr); + } + if (contextIndex !== 0) { + c14nCallbackStorage.free(contextIndex); + } + } +} + +/** + * Canonicalize an entire XML document to a buffer and invoke callbacks to process. + * + + * @param handler Callback to receive the canonicalized output + * @param doc The XML document to canonicalize + * @param options Canonicalization options + * + * @example + * ```typescript + * const handler = new XmlStringOutputBufferHandler(); + * canonicalizeDocument(handler, doc, { + * mode: XmlC14NMode.XML_C14N_1_0, + * withComments: false + * }); + * ``` + */ +export function canonicalizeDocument( + handler: XmlOutputBufferHandler, + doc: XmlDocument, + options: C14NOptions = {}, +): void { + canonicalizeInternal(handler, doc._ptr, options); +} + +/** + * Canonicalize a subtree of an XML document to a buffer and invoke callbacks to process. + * + * This is a convenience helper that creates an isVisible callback to filter + * only nodes within the specified subtree. + * + * @param handler Callback to receive the canonicalized output + * @param doc The document containing the subtree + * @param subtreeRoot The root node of the subtree to canonicalize + * @param options Canonicalization options (cannot include isVisible or nodeSet) + * + * @example + * ```typescript + * const element = doc.get('//my-element'); + * const handler = new XmlStringOutputBufferHandler(); + * canonicalizeSubtree(handler, doc, element!, { + * mode: XmlC14NMode.XML_C14N_EXCLUSIVE_1_0, + * inclusiveNamespacePrefixes: ['ns1', 'ns2'], + * withComments: false + * }); + * ``` + */ +export function canonicalizeSubtree( + handler: XmlOutputBufferHandler, + doc: XmlDocument, + subtreeRoot: XmlNode, + options: SubtreeC14NOptions = {}, +): void { + const subtreeRootPtr = subtreeRoot._nodePtr; + const isVisible = (nodePtr: number, parentPtr: number) => ( + isNodeInSubtree(nodePtr, parentPtr, subtreeRootPtr) + ); + // Use non-cascading behavior for subtree helper + canonicalizeInternal(handler, doc._ptr, { + ...options, + isVisible: isVisible as unknown as XmlC14NIsVisibleCallback, + }, /* wrapCascade */ false); +} diff --git a/src/document.mts b/src/document.mts index 0d481c4..c529f23 100644 --- a/src/document.mts +++ b/src/document.mts @@ -32,6 +32,7 @@ import type { XmlDocPtr, XmlParserCtxtPtr } from './libxml2raw.mjs'; import { disposeBy, XmlDisposable } from './disposable.mjs'; import { XmlDtd } from './dtd.mjs'; import { XmlStringOutputBufferHandler } from './utils.mjs'; +import { type C14NOptions, canonicalizeDocument } from './c14n.mjs'; export enum ParseOption { XML_PARSE_DEFAULT = 0, @@ -494,4 +495,29 @@ export class XmlDocument extends XmlDisposable { xmlXIncludeFreeContext(xinc); } } + + /** + * Canonicalize the document and invoke the handler to process. + * + * @param handler handlers to process the content in the buffer + * @param options options to adjust the canonicalization behavior + * @see {@link canonicalizeDocument} + * @see {@link canonicalizeToString} + */ + canonicalize(handler: XmlOutputBufferHandler, options?: C14NOptions): void { + canonicalizeDocument(handler, this, options); + } + + /** + * Canonicalize the document to a string. + * + * @param options options to adjust the canonicalization behavior + * @returns The canonicalized XML string + * @see {@link canonicalize} + */ + canonicalizeToString(options?: C14NOptions): string { + const handler = new XmlStringOutputBufferHandler(); + this.canonicalize(handler, options); + return handler.result; + } } diff --git a/src/index.mts b/src/index.mts index cf302ea..c216ba2 100644 --- a/src/index.mts +++ b/src/index.mts @@ -52,4 +52,11 @@ export { readBuffer, closeBuffer, XmlBufferInputProvider, + XmlStringOutputBufferHandler, } from './utils.mjs'; +export { + XmlC14NMode, + type C14NOptions, + type SubtreeC14NOptions, + type XmlC14NIsVisibleCallback, +} from './c14n.mjs'; diff --git a/src/libxml2.mts b/src/libxml2.mts index e1dea8c..7b612b9 100644 --- a/src/libxml2.mts +++ b/src/libxml2.mts @@ -7,6 +7,7 @@ import type { XmlErrorPtr, XmlNodePtr, XmlNsPtr, + XmlOutputBufferPtr, XmlParserCtxtPtr, XmlSaveCtxtPtr, XmlXPathCompExprPtr, @@ -18,6 +19,12 @@ import { ContextStorage } from './utils.mjs'; const libxml2 = await moduleLoader(); libxml2._xmlInitParser(); +/** + * Export runtime functions needed by other modules. + * @internal + */ +export const { addFunction } = libxml2; + /** * The base class for exceptions in this library. * @@ -618,6 +625,45 @@ export function xmlSaveSetIndentString( return withStringUTF8(indent, (buf) => libxml2._xmlSaveSetIndentString(ctxt, buf)); } +/** + * Helper to create a C-style NULL-terminated array of C strings. + * + * Allocates a single contiguous memory block containing: + * - First: the pointer array (n+1 pointers, last is NULL) + * - Then: the string data (all strings with null terminators) + * + * Memory layout: [ptr0][ptr1]...[ptrN][NULL][str0\0][str1\0]...[strN\0] + * + * @returns The pointer to the allocated memory. Caller must free with {@link free}. + */ +export function allocCStringArray(strings: string[]): Pointer { + // Calculate total size needed + const pointerArraySize = (strings.length + 1) * 4; // +1 for NULL terminator + const stringSizes = strings.map((s) => libxml2.lengthBytesUTF8(s) + 1); + const totalStringSize = stringSizes.reduce((sum, size) => sum + size, 0); + const totalSize = pointerArraySize + totalStringSize; + + // Allocate single block + const ptr = libxml2._malloc(totalSize); + + // Write strings and set pointers + let stringOffset = ptr + pointerArraySize; + const ptrArrayBase = ptr / libxml2.HEAP32.BYTES_PER_ELEMENT; + strings.forEach((s, i) => { + // Set pointer to this string + libxml2.HEAP32[ptrArrayBase + i] = stringOffset; + // Write the string + libxml2.stringToUTF8(s, stringOffset, stringSizes[i]); + stringOffset += stringSizes[i]; + }); + // NULL terminate the pointer array + libxml2.HEAP32[ptrArrayBase + strings.length] = 0; + + return ptr; +} + +export const free = libxml2._free; + export const xmlAddChild = libxml2._xmlAddChild; export const xmlAddNextSibling = libxml2._xmlAddNextSibling; export const xmlAddPrevSibling = libxml2._xmlAddPrevSibling; @@ -669,3 +715,17 @@ export const xmlXPathFreeContext = libxml2._xmlXPathFreeContext; export const xmlXPathFreeObject = libxml2._xmlXPathFreeObject; export const xmlXPathNewContext = libxml2._xmlXPathNewContext; export const xmlXPathSetContextNode = libxml2._xmlXPathSetContextNode; + +/** + * Create an output buffer using I/O callbacks (same pattern as xmlSaveToIO) + * @internal + */ +export function xmlOutputBufferCreateIO( + handler: XmlOutputBufferHandler, +): XmlOutputBufferPtr { + const index = outputHandlerStorage.allocate(handler); // will be freed in outputClose + return libxml2._xmlOutputBufferCreateIO(outputWrite, outputClose, index, 0); +} + +export const xmlOutputBufferClose = libxml2._xmlOutputBufferClose; +export const xmlC14NExecute = libxml2._xmlC14NExecute; diff --git a/src/libxml2raw.d.mts b/src/libxml2raw.d.mts index b41f2f0..e0e3c09 100644 --- a/src/libxml2raw.d.mts +++ b/src/libxml2raw.d.mts @@ -26,6 +26,7 @@ type XmlXIncludeCtxtPtr = Pointer; type XmlXPathCompExprPtr = Pointer; type XmlXPathContextPtr = Pointer; type XmlXPathObjectPtr = Pointer; +type XmlOutputBufferPtr = Pointer; export class LibXml2 { HEAP32: Int32Array; @@ -160,6 +161,22 @@ export class LibXml2 { _xmlSchemaValidateDoc(ctx: XmlSchemaValidCtxtPtr, doc: XmlDocPtr): number; _xmlSchemaValidateOneElement(ctx: XmlSchemaValidCtxtPtr, elem: XmlNodePtr): number; _xmlUnlinkNode(cur: XmlNodePtr): void; + _xmlC14NExecute( + doc: XmlDocPtr, + is_visible_callback: Pointer, + user_data: Pointer, + mode: number, + inclusive_ns_prefixes: Pointer, + with_comments: number, + buf: Pointer, + ): number; + _xmlOutputBufferCreateIO( + iowrite: Pointer, + ioclose: Pointer, + ioctx: Pointer, + encoder: Pointer, + ): XmlOutputBufferPtr; + _xmlOutputBufferClose(out: XmlOutputBufferPtr): number; // runtime functions UTF8ToString(ptr: CString, maxBytesToRead?: number): string; addFunction(func: Function, sig: string): Pointer; diff --git a/src/nodes.mts b/src/nodes.mts index edab129..b977fce 100644 --- a/src/nodes.mts +++ b/src/nodes.mts @@ -42,6 +42,7 @@ import { import type { XmlDocPtr, XmlNodePtr, XmlNsPtr } from './libxml2raw.mjs'; import { XmlStringOutputBufferHandler } from './utils.mjs'; import { NamespaceMap, XmlXPath } from './xpath.mjs'; +import { canonicalizeSubtree, SubtreeC14NOptions } from './c14n.mjs'; function compiledXPathEval(nodePtr: XmlNodePtr, xpath: XmlXPath) { const context = xmlXPathNewContext(XmlNodeStruct.doc(nodePtr)); @@ -187,6 +188,31 @@ export abstract class XmlNode { return XmlNodeStruct.line(this._nodePtr); } + /** + * Canonicalize this node and its subtree to a buffer and invoke the handler to process. + * + * @param handler handlers to process the content in the buffer + * @param options options to adjust the canonicalization behavior + * @see {@link canonicalizeSubtree} + * @see {@link canonicalizeToString} + */ + canonicalize(handler: XmlOutputBufferHandler, options?: SubtreeC14NOptions): void { + canonicalizeSubtree(handler, this.doc, this, options); + } + + /** + * Canonicalize this node and its subtree and return the result as a string. + * + * @param options options to adjust the canonicalization behavior + * @returns The canonicalized XML string. + * @see {@link canonicalize} + */ + canonicalizeToString(options?: SubtreeC14NOptions): string { + const handler = new XmlStringOutputBufferHandler(); + this.canonicalize(handler, options); + return handler.result; + } + /** * Find the first descendant node matching the given compiled xpath selector * diff --git a/test/crossplatform/c14n.spec.mts b/test/crossplatform/c14n.spec.mts new file mode 100644 index 0000000..f87a94f --- /dev/null +++ b/test/crossplatform/c14n.spec.mts @@ -0,0 +1,410 @@ +import { assert, expect } from 'chai'; +import { + diag, + XmlDocument, + XmlC14NMode, + XmlStringOutputBufferHandler, + XmlElement, + XmlText, + XmlComment, + XmlCData, + XmlAttribute, + XmlEntityReference, +} from '@libxml2-wasm/lib/index.mjs'; +import { XmlTreeCommonStruct } from '@libxml2-wasm/lib/libxml2.mjs'; + +const usingXmlDocument = (doc: XmlDocument, cb: (doc: XmlDocument) => void) => { + diag.configure({ enabled: true }); + try { + cb(doc); + } finally { + doc.dispose(); + const report = diag.report(); + diag.configure({ enabled: false }); + expect(report).to.deep.equal({}); + } +}; + +describe('C14N (XML Canonicalization)', () => { + describe('canonicalizeDocument', () => { + it('should canonicalize a simple XML document', () => { + const xmlString = 'text'; + usingXmlDocument(XmlDocument.fromString(xmlString), (doc) => { + const canonical = doc.canonicalizeToString({ + mode: XmlC14NMode.XML_C14N_1_0, + }); + expect(canonical).to.be.a('string'); + expect(canonical).to.equal(xmlString); + }); + }); + + it('should order attributes', () => { + const xmlString = 'text'; + usingXmlDocument(XmlDocument.fromString(xmlString), (doc) => { + const canonical = doc.canonicalizeToString({ + mode: XmlC14NMode.XML_C14N_1_0, + }); + expect(canonical).to.be.a('string'); + expect(canonical).to.equal('text'); + }); + }); + + it('should sort namespace declarations', () => { + const xmlString = 'text'; + usingXmlDocument(XmlDocument.fromString(xmlString), (doc) => { + const canonical = doc.canonicalizeToString({ + mode: XmlC14NMode.XML_C14N_1_0, + }); + expect(canonical).to.be.a('string'); + expect(canonical).to.equal('text'); + }); + }); + + it('should remove whitespace between attributes', () => { + const xmlString = 'text'; + usingXmlDocument(XmlDocument.fromString(xmlString), (doc) => { + const canonical = doc.canonicalizeToString({ + mode: XmlC14NMode.XML_C14N_1_0, + }); + expect(canonical).to.be.a('string'); + expect(canonical).to.equal('text'); + }); + }); + + it('should replace self-closing tags with full tags', () => { + const xmlString = ''; + usingXmlDocument(XmlDocument.fromString(xmlString), (doc) => { + const canonical = doc.canonicalizeToString({ + mode: XmlC14NMode.XML_C14N_1_0, + }); + expect(canonical).to.be.a('string'); + expect(canonical).to.equal(''); + }); + }); + + it('should remove the XML declaration', () => { + const xmlString = 'text'; + usingXmlDocument(XmlDocument.fromString(xmlString), (doc) => { + const canonical = doc.canonicalizeToString({ + mode: XmlC14NMode.XML_C14N_1_0, + }); + expect(canonical).to.be.a('string'); + expect(canonical).to.equal('text'); + }); + }); + }); + + describe('doc.canonicalize(handler, options)', () => { + it('should canonicalize document using handler API', () => { + const xmlString = 'text'; + usingXmlDocument(XmlDocument.fromString(xmlString), (doc) => { + const handler = new XmlStringOutputBufferHandler(); + doc.canonicalize(handler); + expect(handler.result).to.equal(xmlString); + }); + }); + }); + + describe('canonicalizeSubtree', () => { + it('should canonicalize only a specific subtree', () => { + const xmlString = 'textother'; + usingXmlDocument(XmlDocument.fromString(xmlString), (doc) => { + const node = doc.get('//ns1:child', { ns1: 'uri:ns1' }); + assert(node instanceof XmlElement); + + const canonical = node.canonicalizeToString({ + mode: XmlC14NMode.XML_C14N_EXCLUSIVE_1_0, + }); + + expect(canonical).to.be.a('string'); + expect(canonical).to.equal('text'); + }); + }); + + it('should include inclusive namespaces with exclusive canonicalization', () => { + const xmlString = 'textother'; + usingXmlDocument(XmlDocument.fromString(xmlString), (doc) => { + const inclusiveNamespaces = ['ns3']; + const node = doc.get('//ns1:child', { ns1: 'uri:ns1' }); + assert(node instanceof XmlElement); + + const canonical = node.canonicalizeToString({ + mode: XmlC14NMode.XML_C14N_EXCLUSIVE_1_0, + inclusiveNamespacePrefixes: inclusiveNamespaces, + }); + expect(canonical).to.be.a('string'); + expect(canonical).to.equal('text'); + }); + }); + }); + + describe('canonicalizeNodeSet', () => { + it('should work with nodeset', () => { + const xmlString = 'textother'; + usingXmlDocument(XmlDocument.fromString(xmlString), (doc) => { + const nodes = doc.find('//ns1:child | //ns:sibling', { ns: 'uri:root', ns1: 'uri:ns1' }); + + expect(nodes).to.have.lengthOf(2); + + const nodeSet = new Set(nodes); + const canonical = doc.canonicalizeToString( + { mode: XmlC14NMode.XML_C14N_EXCLUSIVE_1_0, nodeSet }, + ); + + expect(canonical).to.be.a('string'); + expect(canonical).to.equal('textother'); + }); + }); + }); + + describe('canonicalizeCallback', () => { + it('should work with isVisible callback', () => { + const xmlString = 'text'; + usingXmlDocument(XmlDocument.fromString(xmlString), (doc) => { + const canonical = doc.canonicalizeToString({ + mode: XmlC14NMode.XML_C14N_EXCLUSIVE_1_0, + isVisible: () => true, + }); + expect(canonical).to.equal('text'); + }); + }); + + it('should filter nodes with custom isVisible callback', () => { + const xmlString = 'preservedfiltered'; + usingXmlDocument(XmlDocument.fromString(xmlString), (doc) => { + const canonical = doc.canonicalizeToString({ + mode: XmlC14NMode.XML_C14N_1_0, + isVisible: (nodePtr: number) => { + // Filter out elements named 'remove' + const type = XmlTreeCommonStruct.type(nodePtr); + // Only check name for element nodes (type 1) + if (type === 1) { + const name = XmlTreeCommonStruct.name_(nodePtr); + return name !== 'remove'; + } + return true; // Include all other node types + }, + }); + expect(canonical).to.equal('preserved'); + }); + }); + + it('should throw error when both isVisible and nodeSet are provided', () => { + const xmlString = 'text'; + usingXmlDocument(XmlDocument.fromString(xmlString), (doc) => { + const nodes = doc.find('//child'); + const nodeSet = new Set(nodes); + expect(() => doc.canonicalizeToString({ + isVisible: () => true, + nodeSet, + } as any)).to.throw('Cannot specify both isVisible and nodeSet'); + }); + }); + }); + + describe('canonicalization with comments', () => { + it('should exclude comments by default', () => { + const xmlString = 'text'; + usingXmlDocument(XmlDocument.fromString(xmlString), (doc) => { + const canonical = doc.canonicalizeToString({ mode: XmlC14NMode.XML_C14N_1_0 }); + expect(canonical).to.equal('text'); + }); + }); + + it('should include comments when withComments is true', () => { + const xmlString = 'text'; + usingXmlDocument(XmlDocument.fromString(xmlString), (doc) => { + const canonical = doc.canonicalizeToString({ + mode: XmlC14NMode.XML_C14N_1_0, + withComments: true, + }); + expect(canonical).to.equal('text'); + }); + }); + }); + + describe('all C14N modes', () => { + const xmlString = 'text'; + + it('should canonicalize with XML_C14N_1_0', () => { + usingXmlDocument(XmlDocument.fromString(xmlString), (doc) => { + const canonical = doc.canonicalizeToString({ mode: XmlC14NMode.XML_C14N_1_0 }); + expect(canonical).to.be.a('string'); + expect(canonical).to.include('xmlns'); + }); + }); + + it('should canonicalize with XML_C14N_EXCLUSIVE_1_0', () => { + usingXmlDocument(XmlDocument.fromString(xmlString), (doc) => { + const canonical = doc.canonicalizeToString({ + mode: XmlC14NMode.XML_C14N_EXCLUSIVE_1_0, + }); + expect(canonical).to.be.a('string'); + expect(canonical).to.include('xmlns'); + }); + }); + + it('should canonicalize with XML_C14N_1_1', () => { + usingXmlDocument(XmlDocument.fromString(xmlString), (doc) => { + const canonical = doc.canonicalizeToString({ mode: XmlC14NMode.XML_C14N_1_1 }); + expect(canonical).to.be.a('string'); + expect(canonical).to.include('xmlns'); + }); + }); + }); + + describe('node.toCanonicalString() / node.canonicalize()', () => { + it('should canonicalize a node subtree', () => { + const xmlString = 'textother'; + usingXmlDocument(XmlDocument.fromString(xmlString), (doc) => { + const node = doc.get('//ns1:child', { ns1: 'uri:ns1' }); + assert(node instanceof XmlElement); + + const canonical = node.canonicalizeToString({ + mode: XmlC14NMode.XML_C14N_EXCLUSIVE_1_0, + }); + + expect(canonical).to.be.a('string'); + expect(canonical).to.equal('text'); + }); + }); + + it('should work with default options', () => { + const xmlString = 'text'; + usingXmlDocument(XmlDocument.fromString(xmlString), (doc) => { + const node = doc.get('//child'); + assert(node instanceof XmlElement); + + const canonical = node.canonicalizeToString(); + + expect(canonical).to.be.a('string'); + expect(canonical).to.equal('text'); + }); + }); + + it('should support inclusive namespaces', () => { + const xmlString = 'text'; + usingXmlDocument(XmlDocument.fromString(xmlString), (doc) => { + const node = doc.get('//ns1:child', { ns1: 'uri:ns1' }); + assert(node instanceof XmlElement); + + const canonical = node.canonicalizeToString({ + mode: XmlC14NMode.XML_C14N_EXCLUSIVE_1_0, + inclusiveNamespacePrefixes: ['ns3'], + }); + + expect(canonical).to.be.a('string'); + expect(canonical).to.equal('text'); + }); + }); + + it('should include comments when requested', () => { + const xmlString = 'text'; + usingXmlDocument(XmlDocument.fromString(xmlString), (doc) => { + const node = doc.get('//child'); + assert(node instanceof XmlElement); + + const canonical = node.canonicalizeToString({ withComments: true }); + + expect(canonical).to.be.a('string'); + expect(canonical).to.equal('text'); + }); + }); + + it('should support canonicalize(handler, options) API', () => { + const xmlString = 'textother'; + usingXmlDocument(XmlDocument.fromString(xmlString), (doc) => { + const node = doc.get('//ns1:child', { ns1: 'uri:ns1' }); + assert(node instanceof XmlElement); + + const handler = new XmlStringOutputBufferHandler(); + node.canonicalize(handler, { + mode: XmlC14NMode.XML_C14N_EXCLUSIVE_1_0, + }); + + expect(handler.result).to.be.a('string'); + expect(handler.result).to.equal('text'); + }); + }); + }); + + describe('canonicalize on different node types', () => { + it('should canonicalize XmlText node', () => { + const xmlString = 'Left & Right'; + usingXmlDocument(XmlDocument.fromString(xmlString), (doc) => { + const textNode = doc.get('//child/text()'); + assert(textNode instanceof XmlText); + + const canonical = textNode.canonicalizeToString(); + expect(canonical).to.be.a('string'); + // Text node canonicalization includes the text content + expect(canonical).to.equal('Left & Right'); + }); + }); + + it('should canonicalize XmlComment', () => { + const xmlString = 'text'; + + usingXmlDocument(XmlDocument.fromString(xmlString), (doc) => { + const commentNode = doc.get('//child/comment()'); + assert(commentNode instanceof XmlComment); + + // 1. Default Canonicalization (Comments are excluded) + const canonical = commentNode.canonicalizeToString(); + expect(canonical).to.equal(''); + + // 2. Canonicalization with Comments + const canonicalWithComments = commentNode.canonicalizeToString({ + withComments: true, + }); + // libxml2 canonicalizes comments with a trailing newline + expect(canonicalWithComments).to.equal('\n'); + }); + }); + + it('should canonicalize XmlCData', () => { + const xmlString = 'text'; + + usingXmlDocument(XmlDocument.fromString(xmlString), (doc) => { + const cdataNode = doc.get('//child/text()'); + assert(cdataNode instanceof XmlCData); + + const canonical = cdataNode.canonicalizeToString(); + expect(canonical).to.be.a('string'); + + // C14N strips the wrapper and treats it as raw text + expect(canonical).to.equal('cdata content'); + }); + }); + + it('should canonicalize XmlAttribute', () => { + const xmlString = 'text'; + + usingXmlDocument(XmlDocument.fromString(xmlString), (doc) => { + const attr = doc.get('//child/@attr1'); + assert(attr instanceof XmlAttribute); + + const canonical = attr.canonicalizeToString(); + expect(canonical).to.be.a('string'); + + // libxml2 canonicalizes attributes with leading space + expect(canonical).to.equal(' attr1="value1"'); + }); + }); + + it('should throw error when canonicalizing XmlEntityReference', () => { + const xmlString = ' ]>&myent;'; + + usingXmlDocument(XmlDocument.fromString(xmlString), (doc) => { + const entityRef = doc.root.firstChild; + assert(entityRef instanceof XmlEntityReference); + + /** + * C14N states that all entity references must be expanded. Therefore, the concept + * of canonicalizing an Entity Reference node as a distinct object is paradoxical + */ + expect(() => entityRef.canonicalizeToString()).to.throw('Failed to canonicalize XML document'); + }); + }); + }); +});