From 085ab36680f7d9ce8030f34f91b5d0aabb71bf7c Mon Sep 17 00:00:00 2001 From: calixteman Date: Mon, 16 Mar 2026 23:19:21 +0100 Subject: [PATCH 1/3] Don't throw when a mesh shading is degenerated The problem of throwing an error is that it removes an operation from drawing list which can cause a rendering issue. --- src/core/pattern.js | 9 +-------- src/display/pattern_helper.js | 24 ++++++++++++++---------- test/pdfs/.gitignore | 1 + test/pdfs/mesh_shading_empty.pdf | Bin 0 -> 1407 bytes test/test_manifest.json | 7 +++++++ 5 files changed, 23 insertions(+), 18 deletions(-) create mode 100644 test/pdfs/mesh_shading_empty.pdf diff --git a/src/core/pattern.js b/src/core/pattern.js index 6059fdfcf8f4a..62f67618afe13 100644 --- a/src/core/pattern.js +++ b/src/core/pattern.js @@ -968,20 +968,13 @@ class MeshShading extends BaseShading { } getIR() { - const { bounds } = this; - // Ensure that the shading has non-zero width and height, to prevent errors - // in `pattern_helper.js` (fixes issue17848.pdf). - if (bounds[2] - bounds[0] === 0 || bounds[3] - bounds[1] === 0) { - throw new FormatError(`Invalid MeshShading bounds: [${bounds}].`); - } - return [ "Mesh", this.shadingType, this.coords, this.colors, this.figures, - bounds, + this.bounds, this.bbox, this.background, ]; diff --git a/src/display/pattern_helper.js b/src/display/pattern_helper.js index 5a8da30a57b64..f66050d1bf495 100644 --- a/src/display/pattern_helper.js +++ b/src/display/pattern_helper.js @@ -462,16 +462,20 @@ class MeshShadingPattern extends BaseShadingPattern { const boundsWidth = Math.ceil(this._bounds[2]) - offsetX; const boundsHeight = Math.ceil(this._bounds[3]) - offsetY; - const width = Math.min( - Math.ceil(Math.abs(boundsWidth * combinedScale[0] * EXPECTED_SCALE)), - MAX_PATTERN_SIZE - ); - const height = Math.min( - Math.ceil(Math.abs(boundsHeight * combinedScale[1] * EXPECTED_SCALE)), - MAX_PATTERN_SIZE - ); - const scaleX = boundsWidth / width; - const scaleY = boundsHeight / height; + // Ensure that the shading has non-zero width and height, to prevent errors + // in `pattern_helper.js` (fixes issue17848.pdf). + const width = + Math.min( + Math.ceil(Math.abs(boundsWidth * combinedScale[0] * EXPECTED_SCALE)), + MAX_PATTERN_SIZE + ) || 1; + const height = + Math.min( + Math.ceil(Math.abs(boundsHeight * combinedScale[1] * EXPECTED_SCALE)), + MAX_PATTERN_SIZE + ) || 1; + const scaleX = boundsWidth ? boundsWidth / width : 1; + const scaleY = boundsHeight ? boundsHeight / height : 1; const context = { coords: this._coords, diff --git a/test/pdfs/.gitignore b/test/pdfs/.gitignore index 8e4b0a508fc9f..bcbcdd68afe3b 100644 --- a/test/pdfs/.gitignore +++ b/test/pdfs/.gitignore @@ -884,3 +884,4 @@ !form_two_pages.pdf !outlines_se.pdf !radial_gradients.pdf +!mesh_shading_empty.pdf diff --git a/test/pdfs/mesh_shading_empty.pdf b/test/pdfs/mesh_shading_empty.pdf new file mode 100644 index 0000000000000000000000000000000000000000..1bc16ce7e58a061f2f590426ff19f86b3bec6f8c GIT binary patch literal 1407 zcmdT^&61Nq5YBm?qAxjW7!t^)DwRVL<3TM`S#Mi=7@@;rWtr44YptI01|EC^eUv=d z+T9Sq;0s8>FnrTr|9sQTaJ$~T(yu3Kc>Z<%eg1L&sWb#&KYvm4xdIq{`gTMZt&BGZ zn=!C88I}-|(M~NE3X7DD`TLqAmP~ncFj{fk}&bV%k^L}Guy zG&f8840i#sq9A}U3dHXYOZ(&!N^d^rM7>8>RC5gz4o_$ZvCI|=PUK+S?a?GCR;#jQ z(K1yh$rhgCmvFfQ(w3>{7cy}BVA8zEAQC@Zi199`GFb2cWW4YAXS>#a z%@%i)Dz7H@(Hg(z3GUu5d10CRa*J+dt)qCxBi#H2KOSwtf_Bl(!DJP%#>A$`+J>a; zW#r`K5uKNf`fNjwpzpEB`~!?*uA%e}&7&F&1~1jqylBEVUG)ODP;Fi}6K_ykdDVgS zZgM5{w6E-xcssVZRyK^Ol&-dH*+%$JwLP6!!qbWMq>)~iIcqCOUt8|tw(@$<;z?)E zF8V@Js?knr150yf&8GA?%4#|93(Ug)aI_Lp2WinLRt)Q=pj& znhWA68b`2)2GcW;RVZ3)N=E}<9ZVDJ8XN0AHqq48SKK)6W7DV?R|i1{^UmZ4baZIa ikDh-=n89dgEuC`QlYEqv-Z(EcY8^&uIDGSQqy7M4!*;X) literal 0 HcmV?d00001 diff --git a/test/test_manifest.json b/test/test_manifest.json index 96245b451bed9..7dac82e9b3128 100644 --- a/test/test_manifest.json +++ b/test/test_manifest.json @@ -13998,5 +13998,12 @@ "md5": "80e8bed66b83928698f008c33de47edd", "rounds": 1, "type": "eq" + }, + { + "id": "mesh_shading_empty", + "file": "pdfs/mesh_shading_empty.pdf", + "md5": "5a1bf9cb73010d84b47d91bb66bae27d", + "rounds": 1, + "type": "eq" } ] From 32dc2a5894fbb1cc28a3587dccfe2c591d8683d8 Mon Sep 17 00:00:00 2001 From: Calixte Denizet Date: Tue, 17 Mar 2026 15:30:57 +0100 Subject: [PATCH 2/3] Wait to have all the spans in the text layer before trying to set the caret in integration tests --- test/integration/test_utils.mjs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/test/integration/test_utils.mjs b/test/integration/test_utils.mjs index cf382f0983334..6bbb3e7568207 100644 --- a/test/integration/test_utils.mjs +++ b/test/integration/test_utils.mjs @@ -680,6 +680,10 @@ async function firstPageOnTop(page) { } async function setCaretAt(page, pageNumber, text, position) { + // Wait for the text layer to finish rendering before trying to find the span. + await page.waitForSelector( + `.page[data-page-number="${pageNumber}"] .textLayer .endOfContent` + ); await page.evaluate( (pageN, string, pos) => { for (const el of document.querySelectorAll( From e67892d035e22f284ada48950ee7efad8fcb9bab Mon Sep 17 00:00:00 2001 From: Calixte Denizet Date: Tue, 17 Mar 2026 17:40:01 +0100 Subject: [PATCH 3/3] Add support for saving outlines after reorganize/merge (bug 2009574) --- src/core/catalog.js | 19 +- src/core/editor/pdf_editor.js | 227 ++++++++++++++++++++ test/pdfs/.gitignore | 1 + test/pdfs/outlines_for_editor.pdf | Bin 0 -> 2982 bytes test/unit/api_spec.js | 336 ++++++++++++++++++++++++++++++ 5 files changed, 582 insertions(+), 1 deletion(-) create mode 100644 test/pdfs/outlines_for_editor.pdf diff --git a/src/core/catalog.js b/src/core/catalog.js index fcb5964987288..5ec4947f44e52 100644 --- a/src/core/catalog.js +++ b/src/core/catalog.js @@ -317,7 +317,7 @@ class Catalog { return shadow(this, "documentOutline", obj); } - #readDocumentOutline() { + #readDocumentOutline(options = {}) { let obj = this.#catDict.get("Outlines"); if (!(obj instanceof Dict)) { return null; @@ -382,6 +382,10 @@ class Catalog { items: [], }; + if (options.keepRawDict) { + outlineItem.rawDict = outlineDict; + } + i.parent.items.push(outlineItem); obj = outlineDict.getRaw("First"); if (obj instanceof Ref && !processed.has(obj)) { @@ -397,6 +401,19 @@ class Catalog { return root.items.length > 0 ? root.items : null; } + get documentOutlineForEditor() { + let obj = null; + try { + obj = this.#readDocumentOutline({ keepRawDict: true }); + } catch (ex) { + if (ex instanceof MissingDataException) { + throw ex; + } + warn("Unable to read document outline."); + } + return shadow(this, "documentOutlineForEditor", obj); + } + get permissions() { let permissions = null; try { diff --git a/src/core/editor/pdf_editor.js b/src/core/editor/pdf_editor.js index eed48c4e63e6d..8a726a496b148 100644 --- a/src/core/editor/pdf_editor.js +++ b/src/core/editor/pdf_editor.js @@ -70,6 +70,7 @@ class DocumentData { this.acroFormQ = 0; this.hasSignatureAnnotations = false; this.fieldToParent = new RefSetCache(); + this.outline = null; } } @@ -148,6 +149,8 @@ class PDFEditor { acroFormQ = 0; + outlineItems = null; + constructor({ useObjectStreams = true, title = "", author = "" } = {}) { [this.rootRef, this.rootDict] = this.newDict; [this.infoRef, this.infoDict] = this.newDict; @@ -633,6 +636,7 @@ class PDFEditor { promises.length = 0; this.#collectValidDestinations(allDocumentData); + this.#collectOutlineDestinations(allDocumentData); this.#collectPageLabels(); for (const page of this.oldPages) { @@ -650,6 +654,7 @@ class PDFEditor { this.#fixPostponedRefCopies(allDocumentData); await this.#mergeStructTrees(allDocumentData); await this.#mergeAcroForms(allDocumentData); + this.#buildOutline(allDocumentData); return this.writePDF(); } @@ -676,6 +681,9 @@ class PDFEditor { pdfManager .ensureCatalog("acroForm") .then(acroForm => (documentData.acroForm = acroForm)), + pdfManager + .ensureCatalog("documentOutlineForEditor") + .then(outline => (documentData.outline = outline)), ]); const structTreeRoot = documentData.structTreeRoot; if (structTreeRoot) { @@ -1214,6 +1222,224 @@ class PDFEditor { } } + /** + * Collect named destinations referenced in the outlines so they are kept + * when filtering duplicate named destinations. + * @param {Array} allDocumentData + */ + #collectOutlineDestinations(allDocumentData) { + const collect = (items, destinations, usedNamedDestinations) => { + for (const item of items) { + if (typeof item.dest === "string" && destinations?.has(item.dest)) { + usedNamedDestinations.add(item.dest); + } + if (item.items.length > 0) { + collect(item.items, destinations, usedNamedDestinations); + } + } + }; + for (const documentData of allDocumentData) { + const { outline, destinations, usedNamedDestinations } = documentData; + if (outline?.length) { + collect(outline, destinations, usedNamedDestinations); + } + } + } + + /** + * Check whether an outline item has a valid destination in the output doc. + * @param {Object} item + * @param {DocumentData} documentData + * @returns {boolean} + */ + #isValidOutlineDest(item, documentData) { + const { dest, action, url, unsafeUrl, attachment, setOCGState } = item; + // External links (including relative URLs that can't be made absolute), + // named actions, attachments and OCG state changes are always kept. + if (action || url || unsafeUrl || attachment || setOCGState) { + return true; + } + if (!dest) { + return false; + } + if (typeof dest === "string") { + const name = documentData.dedupNamedDestinations.get(dest) || dest; + return this.namedDestinations.has(name); + } + if (Array.isArray(dest) && dest[0] instanceof Ref) { + return !!documentData.oldRefMapping.get(dest[0]); + } + return false; + } + + /** + * Recursively filter outline items, removing those with no valid destination + * and no remaining children. + * @param {Array} items + * @param {DocumentData} documentData + * @returns {Array} + */ + #filterOutlineItems(items, documentData) { + const result = []; + for (const item of items) { + const filteredChildren = this.#filterOutlineItems( + item.items, + documentData + ); + const hasValidOwnDest = this.#isValidOutlineDest(item, documentData); + if (hasValidOwnDest || filteredChildren.length > 0) { + result.push({ + ...item, + // When the item's own destination is invalid (but it has surviving + // children), clear the destination so the output item is a plain + // container rather than a broken link. + dest: hasValidOwnDest ? item.dest : null, + items: filteredChildren, + _documentData: documentData, + }); + } + } + return result; + } + + /** + * Filter outline trees and collect the result into this.outlineItems. + * Must be called after page copies are made (oldRefMapping is populated). + * @param {Array} allDocumentData + */ + #buildOutline(allDocumentData) { + const outlineItems = []; + for (const documentData of allDocumentData) { + const { outline } = documentData; + if (!outline?.length) { + continue; + } + outlineItems.push(...this.#filterOutlineItems(outline, documentData)); + } + this.outlineItems = outlineItems.length > 0 ? outlineItems : null; + } + + /** + * Write the destination or action of an outline item into the given dict. + * @param {Dict} itemDict + * @param {Object} item + * @returns {Promise} + */ + async #setOutlineItemDest(itemDict, item) { + const { dest, rawDict } = item; + const documentData = item._documentData; + if (dest) { + if (typeof dest === "string") { + const name = documentData.dedupNamedDestinations.get(dest) || dest; + itemDict.set("Dest", stringToAsciiOrUTF16BE(name)); + } else if (Array.isArray(dest)) { + const newDest = dest.slice(); + if (newDest[0] instanceof Ref) { + newDest[0] = documentData.oldRefMapping.get(newDest[0]) || newDest[0]; + } + itemDict.set("Dest", newDest); + } + return; + } + // For all other action types (URI, GoToR, Named, SetOCGState, ...) clone + // the raw action dict from the original document. + const actionDict = rawDict?.get("A"); + if (actionDict instanceof Dict) { + this.currentDocument = documentData; + const actionRef = await this.#cloneObject( + actionDict, + documentData.document.xref + ); + this.currentDocument = null; + itemDict.set("A", actionRef); + } + } + + /** + * Build and write the document outline (bookmarks) into the output PDF. + * @returns {Promise} + */ + async #makeOutline() { + const { outlineItems } = this; + if (!outlineItems?.length) { + return; + } + + const [outlineRootRef, outlineRootDict] = this.newDict; + outlineRootDict.setIfName("Type", "Outlines"); + + // First pass: allocate a new Ref for every item in the tree. + const assignRefs = items => { + for (const item of items) { + [item._ref] = this.newDict; + if (item.items.length > 0) { + assignRefs(item.items); + } + } + }; + assignRefs(outlineItems); + + // Second pass: fill each Dict and return the total visible item count. + const fillItems = async (items, parentRef) => { + let totalCount = 0; + for (let i = 0; i < items.length; i++) { + const item = items[i]; + const dict = this.xref[item._ref.num]; + + dict.set("Title", stringToAsciiOrUTF16BE(item.title)); + dict.set("Parent", parentRef); + if (i > 0) { + dict.set("Prev", items[i - 1]._ref); + } + if (i < items.length - 1) { + dict.set("Next", items[i + 1]._ref); + } + + if (item.items.length > 0) { + dict.set("First", item.items[0]._ref); + dict.set("Last", item.items.at(-1)._ref); + const childCount = await fillItems(item.items, item._ref); + if (item.count !== undefined) { + // Preserve the original expanded/collapsed state while updating + // the number of visible descendants after filtering. + dict.set("Count", item.count < 0 ? -childCount : childCount); + } + // A closed item (count < 0) hides its descendants, so it only + // contributes 1 to the parent's visible-item tally. + totalCount += + item.count !== undefined && item.count < 0 ? 1 : childCount + 1; + } else { + totalCount += 1; + } + + await this.#setOutlineItemDest(dict, item); + + const flags = (item.bold ? 2 : 0) | (item.italic ? 1 : 0); + if (flags !== 0) { + dict.set("F", flags); + } + if ( + item.color && + (item.color[0] !== 0 || item.color[1] !== 0 || item.color[2] !== 0) + ) { + dict.set("C", [ + item.color[0] / 255, + item.color[1] / 255, + item.color[2] / 255, + ]); + } + } + return totalCount; + }; + + const totalCount = await fillItems(outlineItems, outlineRootRef); + outlineRootDict.set("First", outlineItems[0]._ref); + outlineRootDict.set("Last", outlineItems.at(-1)._ref); + outlineRootDict.set("Count", totalCount); + + this.rootDict.set("Outlines", outlineRootRef); + } + async #mergeAcroForms(allDocumentData) { this.#setAcroFormDefaultBasicValues(allDocumentData); this.#setAcroFormDefaultAppearance(allDocumentData); @@ -1937,6 +2163,7 @@ class PDFEditor { this.#makePageLabelsTree(); this.#makeDestinationsTree(); this.#makeStructTree(); + await this.#makeOutline(); } /** diff --git a/test/pdfs/.gitignore b/test/pdfs/.gitignore index bcbcdd68afe3b..c00337faa960d 100644 --- a/test/pdfs/.gitignore +++ b/test/pdfs/.gitignore @@ -884,4 +884,5 @@ !form_two_pages.pdf !outlines_se.pdf !radial_gradients.pdf +!outlines_for_editor.pdf !mesh_shading_empty.pdf diff --git a/test/pdfs/outlines_for_editor.pdf b/test/pdfs/outlines_for_editor.pdf new file mode 100644 index 0000000000000000000000000000000000000000..0138d900d8a9cc7da7ef8e94bd999a54cc7a613e GIT binary patch literal 2982 zcmcgu+iu!G5PkPo%u6Klkl5?R7^)~z3ZboBYV@*{9Dye>MwNm zV!Za+FD)t{qup6%&YU@C#lh8NdPI&x=iv9RpSzDkJn$g9|KgmSfP4L8!@#|Yp5!xU zYYsk!=iq)|aT2}Gws0%X4~P%p$iIWrQ-`f%@pD4sxOm>X=jlT?8FK8Xvsl|!9_oC$uZ84{J0$Xs{ejDSNN*=Bgo-*i3*AO@Y zw%w%3L&8B}LOV$#X=F3D#mt(939{BoTIDNww5^nnrPe5xxV_F*&c5y1QAIVkvZy%~ zqgYK>s;~1cXZbox;bwkuD8y=ZqpIj`R24m;GpH_rM{mjbrg-JL ztL!C7)95(MpAL~?lVwwnnq^r-QXR>p9f)+R3Xx{M1Cfw%4DJj+I2xI{j-sX-#mStl zGR^>RR6u`z?Es;fP#~0(+zt?Lvg-_GH-%otmn=Wt#E+PCB5qq$CKn<$i7{=ci5V}W z4GyXUN=e*lR312@CJK?Kq0Ct()d(A@k`6GM3gtMeS(1(`7rGiT zs!3FhY73@>MXEx+PA*BOb(`rG^7$5shgVZ2e0j1XyKiTgvM+g3r7UylD-CB#@vcRe zBU|;FEoNIh-xsQ>uA*j3(!~1eD~G2it?1N?Hac6pHC`rZ4Br#JY+NbjMOZ0D7c~fM zamj?S4J4ecs$NPtRhjh7US-Ua;Qn^#kmwu@}&h zwHFRr&V}}#$lk2`PkY41>&Cz7S}mH|qTd%q<)YDH^|lWz@H@5HCQ1*Uu3$ zLY*}Bs6Vv!`u2Ruz_WS5RDU}o5)vyP8Hzti4SXZtI_G6?)%=*IXF1KoH~C1kJ9-J literal 0 HcmV?d00001 diff --git a/test/unit/api_spec.js b/test/unit/api_spec.js index a16e458787b56..c7bdfe23e6ef0 100644 --- a/test/unit/api_spec.js +++ b/test/unit/api_spec.js @@ -6330,5 +6330,341 @@ small scripts as well as for`); await loadingTask.destroy(); }); }); + + describe("Outlines", function () { + // outlines_for_editor.pdf has 5 pages and the following outline tree: + // + // [0] "Page 1 - explicit dest" dest=[page1 /XYZ 0 0 0] + // [1] "Page 2 - named dest" dest=(page2dest) + // [2] "External URL" /A /URI https://mozilla.org + // [3] "Next Page action" /A /Named /NextPage + // [4] "Remote PDF link" /A /GoToR other.pdf + // [5] "Chapter" dest=(page1dest) + // [5.0] "Section 1" dest=[page2 /FitH 100] + // [5.1] "Section 2" dest=(page3dest) bold+italic, red + // [5.2] "Subsection" dest=(page5dest) + // [5.2.0] "Deep item" dest=(page4dest) + // [6] "No dest parent" (no dest / action) + // [6.0] "Child with dest" dest=(page5dest) + + it("should preserve the full outline when all pages are kept", async function () { + const loadingTask = getDocument( + buildGetDocumentParams("outlines_for_editor.pdf") + ); + const pdfDoc = await loadingTask.promise; + const originalOutline = await pdfDoc.getOutline(); + const data = await pdfDoc.extractPages([{ document: null }]); + await loadingTask.destroy(); + + const newLoadingTask = getDocument(data); + const newPdfDoc = await newLoadingTask.promise; + const outline = await newPdfDoc.getOutline(); + + expect(Array.isArray(outline)).toEqual(true); + expect(outline.length).toEqual(7); + + // Item [0]: explicit array dest + expect(outline[0].title).toEqual("Page 1 - explicit dest"); + expect(Array.isArray(outline[0].dest)).toEqual(true); + expect(outline[0].dest[1].name).toEqual("XYZ"); + + // Item [1]: named string dest + expect(outline[1].title).toEqual("Page 2 - named dest"); + expect(typeof outline[1].dest).toEqual("string"); + + // Item [2]: URI action + expect(outline[2].title).toEqual("External URL"); + expect(outline[2].dest).toEqual(null); + expect(outline[2].url).toEqual("https://mozilla.org/"); + + // Item [3]: built-in named action + expect(outline[3].title).toEqual("Next Page action"); + expect(outline[3].dest).toEqual(null); + expect(outline[3].action).toEqual("NextPage"); + + // Item [4]: GoToR (remote PDF) – relative path, so url is null but + // unsafeUrl holds the raw file path (with dest hash appended). + expect(outline[4].title).toEqual("Remote PDF link"); + expect(outline[4].dest).toEqual(null); + expect(outline[4].unsafeUrl).toContain("other.pdf"); + + // Item [5]: "Chapter" – parent with named dest and 3 children + const chapter = outline[5]; + expect(chapter.title).toEqual("Chapter"); + expect(typeof chapter.dest).toEqual("string"); + expect(chapter.items.length).toEqual(3); + expect(chapter.count).toEqual(originalOutline[5].count); + + // Section 1: explicit FitH dest + expect(chapter.items[0].title).toEqual("Section 1"); + expect(Array.isArray(chapter.items[0].dest)).toEqual(true); + expect(chapter.items[0].dest[1].name).toEqual("FitH"); + + // Section 2: named dest + bold + italic + red color + const section2 = chapter.items[1]; + expect(section2.title).toEqual("Section 2"); + expect(typeof section2.dest).toEqual("string"); + expect(section2.bold).toEqual(true); + expect(section2.italic).toEqual(true); + expect(section2.color).toEqual(new Uint8ClampedArray([255, 0, 0])); + + // Subsection: parent with own dest + one child + const subsection = chapter.items[2]; + expect(subsection.title).toEqual("Subsection"); + expect(subsection.items.length).toEqual(1); + expect(subsection.items[0].title).toEqual("Deep item"); + + // Item [6]: "No dest parent" – no dest, but has a child + const noDestParent = outline[6]; + expect(noDestParent.title).toEqual("No dest parent"); + expect(noDestParent.dest).toEqual(null); + expect(noDestParent.items.length).toEqual(1); + expect(noDestParent.count).toEqual(originalOutline[6].count); + expect(noDestParent.items[0].title).toEqual("Child with dest"); + + await newLoadingTask.destroy(); + }); + + it("should filter outline items pointing to deleted pages", async function () { + // Keep only pages 0 and 1 (page 1 and page 2). + const loadingTask = getDocument( + buildGetDocumentParams("outlines_for_editor.pdf") + ); + const pdfDoc = await loadingTask.promise; + const data = await pdfDoc.extractPages([ + { document: null, includePages: [0, 1] }, + ]); + await loadingTask.destroy(); + + const newLoadingTask = getDocument(data); + const newPdfDoc = await newLoadingTask.promise; + const outline = await newPdfDoc.getOutline(); + + expect(Array.isArray(outline)).toEqual(true); + // 6 items: all except "No dest parent" (its child dest was on page 5). + expect(outline.length).toEqual(6); + + const titles = outline.map(i => i.title); + expect(titles).not.toContain("No dest parent"); + + // "Chapter" is kept (own dest=page1dest points to kept page 1); + // it should have only "Section 1" – "Section 2" (page3) and + // "Subsection" (page5 / page4) are gone. + const chapter = outline.find(i => i.title === "Chapter"); + expect(chapter).not.toBeUndefined(); + expect(chapter.items.length).toEqual(1); + expect(chapter.items[0].title).toEqual("Section 1"); + + // External links are always preserved. + expect(titles).toContain("External URL"); + expect(titles).toContain("Next Page action"); + expect(titles).toContain("Remote PDF link"); + + await newLoadingTask.destroy(); + }); + + it("should keep parent items that have no dest but still have valid children", async function () { + // Keep only pages 2-4 (page 3, 4, 5). + const loadingTask = getDocument( + buildGetDocumentParams("outlines_for_editor.pdf") + ); + const pdfDoc = await loadingTask.promise; + const data = await pdfDoc.extractPages([ + { document: null, includePages: [2, 3, 4] }, + ]); + await loadingTask.destroy(); + + const newLoadingTask = getDocument(data); + const newPdfDoc = await newLoadingTask.promise; + const outline = await newPdfDoc.getOutline(); + + expect(Array.isArray(outline)).toEqual(true); + // 5 items: explicit dest (page1) and named dest (page2dest) are gone; + // the 3 external-link items + "Chapter" + "No dest parent" remain. + expect(outline.length).toEqual(5); + + const titles = outline.map(i => i.title); + expect(titles).not.toContain("Page 1 - explicit dest"); + expect(titles).not.toContain("Page 2 - named dest"); + + // "Chapter" has no valid own dest (page1dest deleted) but has + // surviving children, so it must be kept. + const chapter = outline.find(i => i.title === "Chapter"); + expect(chapter).not.toBeUndefined(); + expect(chapter.dest).toEqual(null); + expect(chapter.items.length).toEqual(2); + + const childTitles = chapter.items.map(i => i.title); + expect(childTitles).toContain("Section 2"); + expect(childTitles).toContain("Subsection"); + expect(childTitles).not.toContain("Section 1"); + + const subsection = chapter.items.find(i => i.title === "Subsection"); + expect(subsection.items.length).toEqual(1); + expect(subsection.items[0].title).toEqual("Deep item"); + + // "No dest parent" has a surviving child (page5dest on kept page 5). + const noDestParent = outline.find(i => i.title === "No dest parent"); + expect(noDestParent).not.toBeUndefined(); + expect(noDestParent.items.length).toEqual(1); + + await newLoadingTask.destroy(); + }); + + it("should merge outlines from two copies, cross-linking surviving dests", async function () { + // Merge: page 1 (index 0) from copy A, page 3 (index 2) from copy B. + // Named dests in the output: "page1dest" → merged page 1 (copy A p1), + // "page3dest" → merged page 2 (copy B p3). + // + // Copy A contributes (page 1 kept): + // "Page 1 - explicit dest" – explicit dest to kept page + // "External URL" / "Next Page action" / "Remote PDF link" – external + // "Chapter" (dest=page1dest) with only child "Section 2" + // Section 2 (dest=page3dest) survives because page3dest is valid + // (points to copy B's page 3 in the merged doc). + // + // Copy B contributes (page 3 kept): + // "External URL" / "Next Page action" / "Remote PDF link" – external + // "Chapter" (dest=page1dest) with only child "Section 2" + // Copy B's "Chapter" has dest=page1dest which happens to be valid + // in the merged doc (copy A's page 1), so it cross-links there. + const loadingTask = getDocument( + buildGetDocumentParams("outlines_for_editor.pdf") + ); + const pdfDoc = await loadingTask.promise; + const pdfDataB = await DefaultFileReaderFactory.fetch({ + path: TEST_PDFS_PATH + "outlines_for_editor.pdf", + }); + + const data = await pdfDoc.extractPages([ + { document: null, includePages: [0] }, + { document: pdfDataB, includePages: [2] }, + ]); + await loadingTask.destroy(); + + const newLoadingTask = getDocument(data); + const newPdfDoc = await newLoadingTask.promise; + expect(newPdfDoc.numPages).toEqual(2); + + const outline = await newPdfDoc.getOutline(); + expect(Array.isArray(outline)).toEqual(true); + // 5 items from copy A + 4 items from copy B = 9 total. + expect(outline.length).toEqual(9); + + // ---- Copy A items ---- + expect(outline[0].title).toEqual("Page 1 - explicit dest"); + expect(Array.isArray(outline[0].dest)).toEqual(true); + expect(outline[1].title).toEqual("External URL"); + expect(outline[2].title).toEqual("Next Page action"); + expect(outline[3].title).toEqual("Remote PDF link"); + + // "Chapter" from copy A: own dest (page1dest) is valid; the only + // surviving child is "Section 2" whose dest (page3dest) cross-links + // to copy B's page (merged page 2). + const chapterA = outline[4]; + expect(chapterA.title).toEqual("Chapter"); + expect(typeof chapterA.dest).toEqual("string"); // page1dest + expect(chapterA.items.length).toEqual(1); + expect(chapterA.items[0].title).toEqual("Section 2"); + expect(typeof chapterA.items[0].dest).toEqual("string"); // page3dest + + // ---- Copy B items ---- + expect(outline[5].title).toEqual("External URL"); + expect(outline[6].title).toEqual("Next Page action"); + expect(outline[7].title).toEqual("Remote PDF link"); + + // "Chapter" from copy B: its original dest (page1dest) resolves to + // copy A's page 1 after merging, so it is kept (cross-document link). + const chapterB = outline[8]; + expect(chapterB.title).toEqual("Chapter"); + expect(typeof chapterB.dest).toEqual("string"); // page1dest → copy A p1 + expect(chapterB.items.length).toEqual(1); + expect(chapterB.items[0].title).toEqual("Section 2"); + expect(typeof chapterB.items[0].dest).toEqual("string"); // page3dest + + // "Page 1 - explicit dest" from copy B should be absent (copy B's + // page 1 was not kept). + const titles = outline.map(i => i.title); + expect(titles.indexOf("Page 1 - explicit dest")).toEqual(0); + expect(titles.lastIndexOf("Page 1 - explicit dest")).toEqual(0); + + // Neither copy contributes "Page 2 - named dest" or "No dest parent". + expect(titles).not.toContain("Page 2 - named dest"); + expect(titles).not.toContain("No dest parent"); + + await newLoadingTask.destroy(); + }); + + it("should produce no outline when the source PDF has none", async function () { + // tracemonkey.pdf has no outline at all. + const loadingTask = getDocument(tracemonkeyGetDocumentParams); + const pdfDoc = await loadingTask.promise; + const data = await pdfDoc.extractPages([{ document: null }]); + await loadingTask.destroy(); + + const newLoadingTask = getDocument(data); + const newPdfDoc = await newLoadingTask.promise; + const outline = await newPdfDoc.getOutline(); + + expect(outline).toEqual(null); + + await newLoadingTask.destroy(); + }); + + it("should rename conflicting named dests when both copies keep the page", async function () { + // Merge page 1 (index 0) from copy A with page 1 (index 0) from copy B + // (same PDF). Both copies have "page1dest" pointing to their page 1, + // and both pages are kept. The deduplication logic must rename the + // second occurrence so both named dests survive in the output. + const loadingTask = getDocument( + buildGetDocumentParams("outlines_for_editor.pdf") + ); + const pdfDoc = await loadingTask.promise; + const pdfDataB = await DefaultFileReaderFactory.fetch({ + path: TEST_PDFS_PATH + "outlines_for_editor.pdf", + }); + + const data = await pdfDoc.extractPages([ + { document: null, includePages: [0] }, + { document: pdfDataB, includePages: [0] }, + ]); + await loadingTask.destroy(); + + const newLoadingTask = getDocument(data); + const newPdfDoc = await newLoadingTask.promise; + expect(newPdfDoc.numPages).toEqual(2); + + const outline = await newPdfDoc.getOutline(); + expect(Array.isArray(outline)).toEqual(true); + // Copy A: "Page 1 - explicit dest", "External URL", "Next Page + // action", "Remote PDF link", "Chapter" (dest=page1dest) + // Copy B: same 5 items but "Chapter" dest is renamed. + expect(outline.length).toEqual(10); + + // The "Chapter" items from the two copies must have different dest + // strings: one with the original "page1dest" and one with the renamed + // version (contains a suffix to avoid collisions). + const chapterItems = outline.filter(i => i.title === "Chapter"); + expect(chapterItems.length).toEqual(2); + const chapterDests = chapterItems.map(i => i.dest); + expect(chapterDests[0]).not.toEqual(chapterDests[1]); + // One of them is the original name. + expect(chapterDests.includes("page1dest")).toEqual(true); + // The other is a renamed version that still exists in the doc. + const renamedDest = chapterDests.find(d => d !== "page1dest"); + expect(typeof renamedDest).toEqual("string"); + + // Verify the "Page 1 - explicit dest" items: copy A uses an array dest + // pointing to its page, copy B uses its renamed page ref. + const page1Items = outline.filter( + i => i.title === "Page 1 - explicit dest" + ); + expect(page1Items.length).toEqual(2); + expect(Array.isArray(page1Items[0].dest)).toEqual(true); + expect(Array.isArray(page1Items[1].dest)).toEqual(true); + + await newLoadingTask.destroy(); + }); + }); }); });