From e40ce05467f420db0482488aec728df772bc0dc8 Mon Sep 17 00:00:00 2001 From: Harold Hunt Date: Mon, 20 May 2024 22:22:57 -0400 Subject: [PATCH 1/9] Issue-423 - Failing test when SitemapStream error --- tests/sitemap-index.test.ts | 36 +++++++++++++++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) diff --git a/tests/sitemap-index.test.ts b/tests/sitemap-index.test.ts index 64937b7..6098315 100644 --- a/tests/sitemap-index.test.ts +++ b/tests/sitemap-index.test.ts @@ -1,6 +1,6 @@ import { SitemapStream } from '../index'; import { tmpdir } from 'os'; -import { resolve } from 'path'; +import { resolve, join } from 'path'; import { existsSync, unlinkSync, @@ -12,6 +12,7 @@ import { SitemapAndIndexStream, } from '../lib/sitemap-index-stream'; import { streamToPromise } from '../dist'; +import { finished } from 'node:stream/promises'; import { WriteStream } from 'node:fs'; /* eslint-env jest, jasmine */ function removeFilesArray(files): void { @@ -175,4 +176,37 @@ describe('sitemapAndIndex', () => { ); expect(xml.toString()).toContain('https://1.example.com/a'); }); + + it('propagates error from sitemap stream that cannot be written', async () => { + const baseURL = 'https://example.com/sub/'; + + const sms = new SitemapAndIndexStream({ + limit: 1, + getSitemapStream: (i: number): [string, SitemapStream, WriteStream] => { + const sm = new SitemapStream(); + const path = `./sitemap-${i}.xml`; + + // This will not throw even though it will fail + // `outputStream.writable === true` + // `outputStream.closed === false` + const outputStream = createWriteStream( + resolve(join(targetFolder, 'does', 'not', 'exist'), path) + ); + + const ws = sm.pipe(outputStream); + return [new URL(path, baseURL).toString(), sm, ws]; + }, + }); + sms.write('https://1.example.com/a'); + sms.write('https://2.example.com/a'); + sms.write('https://3.example.com/a'); + sms.write('https://4.example.com/a'); + sms.end(); + await finished(sms); + expect( + existsSync( + resolve(join(targetFolder, 'does', 'not', 'exist'), `./sitemap-0.xml`) + ) + ).toBe(false); + }); }); From f339f7bb3ae0a1fff2d276608f5d058213f98570 Mon Sep 17 00:00:00 2001 From: Harold Hunt Date: Mon, 20 May 2024 22:51:58 -0400 Subject: [PATCH 2/9] Issue 423 - Propagate SitemapStream error --- lib/sitemap-index-stream.ts | 2 ++ tests/sitemap-index.test.ts | 11 ++++++++++- 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/lib/sitemap-index-stream.ts b/lib/sitemap-index-stream.ts index 16160e9..c75d966 100644 --- a/lib/sitemap-index-stream.ts +++ b/lib/sitemap-index-stream.ts @@ -99,6 +99,7 @@ export class SitemapAndIndexStream extends SitemapIndexStream { this.getSitemapStream = opts.getSitemapStream; [this.idxItem, this.currentSitemap, this.currentSitemapPipeline] = this.getSitemapStream(0); + this.currentSitemap.on('error', (err) => this.emit('error', err)); this.limit = opts.limit ?? 45000; } @@ -124,6 +125,7 @@ export class SitemapAndIndexStream extends SitemapIndexStream { const onFinish = () => { const [idxItem, currentSitemap, currentSitemapPipeline] = this.getSitemapStream(this.i / this.limit); + currentSitemap.on('error', (err) => this.emit('error', err)); this.currentSitemap = currentSitemap; this.currentSitemapPipeline = currentSitemapPipeline; // push to index stream diff --git a/tests/sitemap-index.test.ts b/tests/sitemap-index.test.ts index 6098315..0b1e83d 100644 --- a/tests/sitemap-index.test.ts +++ b/tests/sitemap-index.test.ts @@ -193,6 +193,12 @@ describe('sitemapAndIndex', () => { resolve(join(targetFolder, 'does', 'not', 'exist'), path) ); + // Streams do not automatically propagate errors + // We must propagate this up to the SitemapStream + outputStream.on('error', (err) => { + sm.emit('error', err); + }); + const ws = sm.pipe(outputStream); return [new URL(path, baseURL).toString(), sm, ws]; }, @@ -202,7 +208,10 @@ describe('sitemapAndIndex', () => { sms.write('https://3.example.com/a'); sms.write('https://4.example.com/a'); sms.end(); - await finished(sms); + await expect(finished(sms)).rejects.toThrow( + 'ENOENT: no such file or directory, open' + ); + expect( existsSync( resolve(join(targetFolder, 'does', 'not', 'exist'), `./sitemap-0.xml`) From 6deb61430dfe52ebc289132bbf1a75ae27bd5ad7 Mon Sep 17 00:00:00 2001 From: Harold Hunt Date: Mon, 20 May 2024 23:14:01 -0400 Subject: [PATCH 3/9] Issue 423 - Fix node 14 compatibility --- tests/sitemap-index.test.ts | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/tests/sitemap-index.test.ts b/tests/sitemap-index.test.ts index 0b1e83d..8a207c9 100644 --- a/tests/sitemap-index.test.ts +++ b/tests/sitemap-index.test.ts @@ -12,8 +12,12 @@ import { SitemapAndIndexStream, } from '../lib/sitemap-index-stream'; import { streamToPromise } from '../dist'; -import { finished } from 'node:stream/promises'; +import { finished as finishedCallback } from 'stream'; import { WriteStream } from 'node:fs'; +import { promisify } from 'util'; + +const finished = promisify(finishedCallback); + /* eslint-env jest, jasmine */ function removeFilesArray(files): void { if (files && files.length) { From 510925cd1f67b54c794b94d31523602e01c2700f Mon Sep 17 00:00:00 2001 From: Harold Hunt Date: Tue, 21 May 2024 20:23:18 -0400 Subject: [PATCH 4/9] Issue-423 - SitemapIndexAndStream tests --- tests/sitemap-index.test.ts | 303 +++++++++++++++++++++++++++++++++++- 1 file changed, 299 insertions(+), 4 deletions(-) diff --git a/tests/sitemap-index.test.ts b/tests/sitemap-index.test.ts index 8a207c9..8fd2284 100644 --- a/tests/sitemap-index.test.ts +++ b/tests/sitemap-index.test.ts @@ -1,6 +1,6 @@ import { SitemapStream } from '../index'; import { tmpdir } from 'os'; -import { resolve, join } from 'path'; +import { join, resolve } from 'path'; import { existsSync, unlinkSync, @@ -11,9 +11,9 @@ import { SitemapIndexStream, SitemapAndIndexStream, } from '../lib/sitemap-index-stream'; -import { streamToPromise } from '../dist'; +import { streamToPromise } from '../lib/sitemap-stream'; import { finished as finishedCallback } from 'stream'; -import { WriteStream } from 'node:fs'; +import { readFileSync, WriteStream } from 'node:fs'; import { promisify } from 'util'; const finished = promisify(finishedCallback); @@ -134,6 +134,8 @@ describe('sitemapAndIndex', () => { resolve(targetFolder, `./sitemap-1.xml`), resolve(targetFolder, `./sitemap-2.xml`), resolve(targetFolder, `./sitemap-3.xml`), + resolve(targetFolder, `./sitemap-4.xml`), + resolve(targetFolder, `./sitemap-index.xml`), ]); }); @@ -143,6 +145,8 @@ describe('sitemapAndIndex', () => { resolve(targetFolder, `./sitemap-1.xml`), resolve(targetFolder, `./sitemap-2.xml`), resolve(targetFolder, `./sitemap-3.xml`), + resolve(targetFolder, `./sitemap-4.xml`), + resolve(targetFolder, `./sitemap-index.xml`), ]); }); @@ -155,7 +159,15 @@ describe('sitemapAndIndex', () => { const sm = new SitemapStream(); const path = `./sitemap-${i}.xml`; - const ws = sm.pipe(createWriteStream(resolve(targetFolder, path))); + const outputStream = createWriteStream(resolve(targetFolder, path)); + + // Streams do not automatically propagate errors + // We must propagate this up to the SitemapStream + outputStream.on('error', (err) => { + sm.emit('error', err); + }); + + const ws = sm.pipe(outputStream); return [new URL(path, baseURL).toString(), sm, ws]; }, }); @@ -222,4 +234,287 @@ describe('sitemapAndIndex', () => { ) ).toBe(false); }); + + it('writes to index file', async () => { + const baseURL = 'https://example.com/sub/'; + + const sms = new SitemapAndIndexStream({ + limit: 2, + getSitemapStream: (i: number): [string, SitemapStream, WriteStream] => { + const sm = new SitemapStream(); + const path = `./sitemap-${i}.xml`; + + // This will not throw even though it will fail + // `outputStream.writable === true` + // `outputStream.closed === false` + const outputStream = createWriteStream(resolve(targetFolder, path)); + + // Streams do not automatically propagate errors + // We must propagate this up to the SitemapStream + outputStream.on('error', (err) => { + sm.emit('error', err); + }); + + const ws = sm.pipe(outputStream); + return [new URL(path, baseURL).toString(), sm, ws]; + }, + }); + + // Pipe the index stream to a file + const indexStream = createWriteStream( + resolve(targetFolder, `./sitemap-index.xml`) + ); + sms.pipe(indexStream); + await writeData(sms, 'https://1.example.com/a'); + await writeData(sms, 'https://2.example.com/a'); + await writeData(sms, 'https://3.example.com/a'); + sms.end(); + await expect(finished(sms)).resolves.toBeUndefined(); + + await finished(indexStream); + + expect(existsSync(resolve(targetFolder, `./sitemap-index.xml`))).toBe(true); + expect(existsSync(resolve(targetFolder, `./sitemap-0.xml`))).toBe(true); + expect(existsSync(resolve(targetFolder, `./sitemap-1.xml`))).toBe(true); + expect(existsSync(resolve(targetFolder, `./sitemap-2.xml`))).toBe(false); + + // Read the first sitemap to make sure it was written + const sitemap0 = await streamToPromise( + createReadStream(resolve(targetFolder, `./sitemap-0.xml`)) + ); + expect(sitemap0.toString()).toContain('https://1.example.com/a'); + + // Read the last sitemap to make sure it was written + const sitemap1 = await streamToPromise( + createReadStream(resolve(targetFolder, `./sitemap-1.xml`)) + ); + expect(sitemap1.toString()).toContain('https://3.example.com/a'); + + // Read the index to make sure it was written + const indexText = readFileSync( + resolve(targetFolder, `./sitemap-index.xml`), + 'utf-8' + ); + expect(indexText).toContain(`${baseURL}sitemap-0`); + expect(indexText).toContain(`${baseURL}sitemap-1`); + expect(indexText).not.toContain(`${baseURL}sitemap-2`); + }); + + it('does not hang if last sitemap is filled', async () => { + const baseURL = 'https://example.com/sub/'; + + const sms = new SitemapAndIndexStream({ + limit: 2, + getSitemapStream: (i: number): [string, SitemapStream, WriteStream] => { + const sm = new SitemapStream(); + const path = `./sitemap-${i}.xml`; + + // This will not throw even though it will fail + // `outputStream.writable === true` + // `outputStream.closed === false` + const outputStream = createWriteStream(resolve(targetFolder, path)); + + // Streams do not automatically propagate errors + // We must propagate this up to the SitemapStream + outputStream.on('error', (err) => { + sm.emit('error', err); + }); + + const ws = sm.pipe(outputStream); + return [new URL(path, baseURL).toString(), sm, ws]; + }, + }); + + // Pipe the index stream to a file + const indexStream = createWriteStream( + resolve(targetFolder, `./sitemap-index.xml`) + ); + sms.pipe(indexStream); + await writeData(sms, 'https://1.example.com/a'); + await writeData(sms, 'https://2.example.com/a'); + sms.end(); + await expect(finished(sms)).resolves.toBeUndefined(); + + await finished(indexStream); + + expect(existsSync(resolve(targetFolder, `./sitemap-index.xml`))).toBe(true); + expect(existsSync(resolve(targetFolder, `./sitemap-0.xml`))).toBe(true); + expect(existsSync(resolve(targetFolder, `./sitemap-1.xml`))).toBe(false); + + const sitemap0Raw = readFileSync( + resolve(targetFolder, `./sitemap-0.xml`), + 'utf-8' + ); + expect(sitemap0Raw).toContain('https://1.example.com/a'); + expect(sitemap0Raw).toContain('https://2.example.com/a'); + expect(sitemap0Raw).not.toContain('https://3.example.com/a'); + + // Read the first sitemap to make sure it was written + const sitemap0 = await streamToPromise( + createReadStream(resolve(targetFolder, `./sitemap-0.xml`)) + ); + expect(sitemap0.toString()).toContain('https://1.example.com/a'); + + // Read the index to make sure it was written + const indexText = readFileSync( + resolve(targetFolder, `./sitemap-index.xml`), + 'utf-8' + ); + expect(indexText).toContain(`${baseURL}sitemap-0`); + expect(indexText).not.toContain(`${baseURL}sitemap-1`); + }); + + it('deterministically finishes writing each sitemap file before creating a new one', async () => { + const baseURL = 'https://example.com/sub/'; + + const sms = new SitemapAndIndexStream({ + limit: 5000, + getSitemapStream: (i: number): [string, SitemapStream, WriteStream] => { + const sm = new SitemapStream(); + const path = `./sitemap-${i}.xml`; + + // This will not throw even though it will fail + // `outputStream.writable === true` + // `outputStream.closed === false` + const outputStream = createWriteStream(resolve(targetFolder, path)); + + // Streams do not automatically propagate errors + // We must propagate this up to the SitemapStream + outputStream.on('error', (err) => { + sm.emit('error', err); + }); + + const ws = sm.pipe(outputStream); + return [new URL(path, baseURL).toString(), sm, ws]; + }, + }); + + // Pipe the index stream to a file + const indexStream = createWriteStream( + resolve(targetFolder, `./sitemap-index.xml`) + ); + sms.pipe(indexStream); + for (let i = 0; i < 5000; i++) { + // Intentionally write while ignoring back pressure to stress test + // the rolling to new files + sms.write(`https://1.example.com/a${i}`); + } + for (let i = 0; i < 5000; i++) { + sms.write(`https://2.example.com/a${i}`); + } + for (let i = 0; i < 1; i++) { + sms.write(`https://3.example.com/a${i}`); + } + sms.end(); + await expect(finished(sms)).resolves.toBeUndefined(); + + await finished(indexStream); + + expect(existsSync(resolve(targetFolder, `./sitemap-index.xml`))).toBe(true); + expect(existsSync(resolve(targetFolder, `./sitemap-0.xml`))).toBe(true); + expect(existsSync(resolve(targetFolder, `./sitemap-1.xml`))).toBe(true); + expect(existsSync(resolve(targetFolder, `./sitemap-2.xml`))).toBe(true); + expect(existsSync(resolve(targetFolder, `./sitemap-3.xml`))).toBe(false); + + // Make sure the very first file is completed + const sitemap0Raw = readFileSync( + resolve(targetFolder, `./sitemap-0.xml`), + 'utf-8' + ); + expect(sitemap0Raw).toContain(''); + expect(sitemap0Raw).toContain('https://1.example.com/a0'); + expect(sitemap0Raw).toContain('https://1.example.com/a4999'); + expect(sitemap0Raw).toContain(''); + + // Make sure the first rolled file is completed + const sitemap1Raw = readFileSync( + resolve(targetFolder, `./sitemap-1.xml`), + 'utf-8' + ); + expect(sitemap1Raw).toContain(''); + expect(sitemap1Raw).toContain('https://2.example.com/a0'); + expect(sitemap1Raw).toContain('https://2.example.com/a4999'); + expect(sitemap1Raw).toContain(''); + + // Make sure the last file is completed + const sitemap2Raw = readFileSync( + resolve(targetFolder, `./sitemap-2.xml`), + 'utf-8' + ); + expect(sitemap2Raw).toContain(''); + expect(sitemap2Raw).toContain('https://3.example.com/a0'); + expect(sitemap2Raw).toContain(''); + expect(sitemap2Raw).not.toContain('https://3.example.com/a1'); + + // Read the index to make sure it was written + const indexText = readFileSync( + resolve(targetFolder, `./sitemap-index.xml`), + 'utf-8' + ); + expect(indexText).toContain(''); + expect(indexText).not.toContain(`${baseURL}sitemap-3`); + }); + + it('works if no items written at all', async () => { + const baseURL = 'https://example.com/sub/'; + + const sms = new SitemapAndIndexStream({ + limit: 2, + getSitemapStream: (i: number): [string, SitemapStream, WriteStream] => { + const sm = new SitemapStream(); + const path = `./sitemap-${i}.xml`; + + // This will not throw even though it will fail + // `outputStream.writable === true` + // `outputStream.closed === false` + const outputStream = createWriteStream(resolve(targetFolder, path)); + + // Streams do not automatically propagate errors + // We must propagate this up to the SitemapStream + outputStream.on('error', (err) => { + sm.emit('error', err); + }); + + const ws = sm.pipe(outputStream); + return [new URL(path, baseURL).toString(), sm, ws]; + }, + }); + + // Pipe the index stream to a file + const indexStream = createWriteStream( + resolve(targetFolder, `./sitemap-index.xml`) + ); + sms.pipe(indexStream); + sms.end(); + await expect(finished(sms)).resolves.toBeUndefined(); + + await finished(indexStream); + + expect(existsSync(resolve(targetFolder, `./sitemap-index.xml`))).toBe(true); + expect(existsSync(resolve(targetFolder, `./sitemap-0.xml`))).toBe(false); + + // Read the index to make sure it was written + const indexText = readFileSync( + resolve(targetFolder, `./sitemap-index.xml`), + 'utf-8' + ); + expect(indexText).toContain(``); + expect(indexText).not.toContain(`${baseURL}sitemap-2`); + }); }); + +function writeData(sms: SitemapStream, data: any): Promise { + if (!sms.write(data)) { + return new Promise((resolve) => { + sms.once('drain', resolve); + }); + } + return Promise.resolve(); +} From c412f375cd3672c67774b316f89b0a10e6c138cb Mon Sep 17 00:00:00 2001 From: Harold Hunt Date: Tue, 21 May 2024 20:46:48 -0400 Subject: [PATCH 5/9] Issue-423 - Update empty index test --- tests/sitemap-index.test.ts | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/tests/sitemap-index.test.ts b/tests/sitemap-index.test.ts index 8fd2284..6718779 100644 --- a/tests/sitemap-index.test.ts +++ b/tests/sitemap-index.test.ts @@ -373,9 +373,6 @@ describe('sitemapAndIndex', () => { const sm = new SitemapStream(); const path = `./sitemap-${i}.xml`; - // This will not throw even though it will fail - // `outputStream.writable === true` - // `outputStream.closed === false` const outputStream = createWriteStream(resolve(targetFolder, path)); // Streams do not automatically propagate errors @@ -459,7 +456,7 @@ describe('sitemapAndIndex', () => { expect(indexText).not.toContain(`${baseURL}sitemap-3`); }); - it('works if no items written at all', async () => { + it('writes index if no items written at all', async () => { const baseURL = 'https://example.com/sub/'; const sms = new SitemapAndIndexStream({ @@ -468,9 +465,6 @@ describe('sitemapAndIndex', () => { const sm = new SitemapStream(); const path = `./sitemap-${i}.xml`; - // This will not throw even though it will fail - // `outputStream.writable === true` - // `outputStream.closed === false` const outputStream = createWriteStream(resolve(targetFolder, path)); // Streams do not automatically propagate errors @@ -503,14 +497,17 @@ describe('sitemapAndIndex', () => { 'utf-8' ); expect(indexText).toContain(``); expect(indexText).not.toContain(`${baseURL}sitemap-2`); }); }); -function writeData(sms: SitemapStream, data: any): Promise { +function writeData( + sms: SitemapStream | SitemapAndIndexStream, + data: any +): Promise { if (!sms.write(data)) { return new Promise((resolve) => { sms.once('drain', resolve); From 6d9d10b26424e55713d30f239273d5743499142a Mon Sep 17 00:00:00 2001 From: Harold Hunt Date: Tue, 21 May 2024 20:48:24 -0400 Subject: [PATCH 6/9] Issue-423 - Fix failing tests --- lib/sitemap-index-stream.ts | 178 +++++++++++++++++++++++++----------- tests/sitemap-index.test.ts | 2 +- 2 files changed, 127 insertions(+), 53 deletions(-) diff --git a/lib/sitemap-index-stream.ts b/lib/sitemap-index-stream.ts index c75d966..7421bc7 100644 --- a/lib/sitemap-index-stream.ts +++ b/lib/sitemap-index-stream.ts @@ -1,8 +1,8 @@ +import { WriteStream } from 'fs'; import { Transform, TransformOptions, TransformCallback } from 'stream'; import { IndexItem, SitemapItemLoose, ErrorLevel } from './types'; import { SitemapStream, stylesheetInclude } from './sitemap-stream'; import { element, otag, ctag } from './sitemap-xml'; -import { WriteStream } from 'fs'; export enum IndexTagNames { sitemap = 'sitemap', @@ -36,18 +36,22 @@ export class SitemapIndexStream extends Transform { this.xslUrl = opts.xslUrl; } + private writeHeadOutput(): void { + this.hasHeadOutput = true; + let stylesheet = ''; + if (this.xslUrl) { + stylesheet = stylesheetInclude(this.xslUrl); + } + this.push(xmlDec + stylesheet + sitemapIndexTagStart); + } + _transform( item: IndexItem | string, encoding: string, callback: TransformCallback ): void { if (!this.hasHeadOutput) { - this.hasHeadOutput = true; - let stylesheet = ''; - if (this.xslUrl) { - stylesheet = stylesheetInclude(this.xslUrl); - } - this.push(xmlDec + stylesheet + sitemapIndexTagStart); + this.writeHeadOutput(); } this.push(otag(IndexTagNames.sitemap)); if (typeof item === 'string') { @@ -69,11 +73,29 @@ export class SitemapIndexStream extends Transform { } _flush(cb: TransformCallback): void { + if (!this.hasHeadOutput) { + this.writeHeadOutput(); + } + this.push(closetag); cb(); } } +/** + * Callback for SitemapIndexAndStream that creates a new sitemap stream for a given sitemap index. + * + * Called when a new sitemap file is needed. + * + * The write stream is the destination where the sitemap was piped. + * SitemapAndIndexStream will wait for the `finish` event on each sitemap's + * write stream before moving on to the next sitemap. This ensures that the + * contents of the write stream will be fully written before being used + * by any following operations (e.g. uploading, reading contents for unit tests). + * + * @param i - The index of the sitemap file + * @returns A tuple containing the index item to be written into the sitemap index, the sitemap stream, and the write stream for the sitemap pipe destination + */ type getSitemapStream = ( i: number ) => [IndexItem | string, SitemapStream, WriteStream]; @@ -84,70 +106,122 @@ export interface SitemapAndIndexStreamOptions limit?: number; getSitemapStream: getSitemapStream; } -// const defaultSIStreamOpts: SitemapAndIndexStreamOptions = {}; + export class SitemapAndIndexStream extends SitemapIndexStream { - private i: number; + private itemsWritten: number; private getSitemapStream: getSitemapStream; - private currentSitemap: SitemapStream; - private currentSitemapPipeline?: WriteStream; - private idxItem: IndexItem | string; + private currentSitemap?: SitemapStream; private limit: number; + private currentSitemapPipeline?: WriteStream; + constructor(opts: SitemapAndIndexStreamOptions) { opts.objectMode = true; super(opts); - this.i = 0; + this.itemsWritten = 0; this.getSitemapStream = opts.getSitemapStream; - [this.idxItem, this.currentSitemap, this.currentSitemapPipeline] = - this.getSitemapStream(0); - this.currentSitemap.on('error', (err) => this.emit('error', err)); this.limit = opts.limit ?? 45000; } - _writeSMI(item: SitemapItemLoose, callback: () => void): void { - this.i++; - if (!this.currentSitemap.write(item)) { - this.currentSitemap.once('drain', callback); - } else { - process.nextTick(callback); - } - } - _transform( item: SitemapItemLoose, encoding: string, callback: TransformCallback ): void { - if (this.i === 0) { - this._writeSMI(item, () => - super._transform(this.idxItem, encoding, callback) - ); - } else if (this.i % this.limit === 0) { - const onFinish = () => { - const [idxItem, currentSitemap, currentSitemapPipeline] = - this.getSitemapStream(this.i / this.limit); - currentSitemap.on('error', (err) => this.emit('error', err)); - this.currentSitemap = currentSitemap; - this.currentSitemapPipeline = currentSitemapPipeline; - // push to index stream - this._writeSMI(item, () => - // push to index stream - super._transform(idxItem, encoding, callback) - ); - }; - this.currentSitemapPipeline?.on('finish', onFinish); - this.currentSitemap.end( - !this.currentSitemapPipeline ? onFinish : undefined - ); + if (this.itemsWritten % this.limit === 0) { + if (this.currentSitemap) { + const onFinish = new Promise((resolve, reject) => { + this.currentSitemap?.on('finish', resolve); + this.currentSitemap?.on('error', reject); + this.currentSitemap?.end(); + }); + + const onPipelineFinish = this.currentSitemapPipeline + ? new Promise((resolve, reject) => { + this.currentSitemapPipeline?.on('finish', resolve); + this.currentSitemapPipeline?.on('error', reject); + }) + : Promise.resolve(); + + Promise.all([onFinish, onPipelineFinish]) + .then(() => { + this.createSitemap(encoding); + this.writeItem(item, callback); + }) + .catch(callback); + return; + } else { + this.createSitemap(encoding); + } + } + + this.writeItem(item, callback); + } + + private writeItem(item: SitemapItemLoose, callback: TransformCallback): void { + if (!this.currentSitemap) { + callback(new Error('No sitemap stream available')); + return; + } + + if (!this.currentSitemap.write(item)) { + this.currentSitemap.once('drain', callback); } else { - this._writeSMI(item, callback); + process.nextTick(callback); } + + // Increment the count of items written + this.itemsWritten++; } + /** + * Called when the stream is finished. + * If there is a current sitemap, we wait for it to finish before calling the callback. + * + * @param cb + */ _flush(cb: TransformCallback): void { - const onFinish = () => super._flush(cb); - this.currentSitemapPipeline?.on('finish', onFinish); - this.currentSitemap.end( - !this.currentSitemapPipeline ? onFinish : undefined - ); + const onFinish = new Promise((resolve, reject) => { + if (this.currentSitemap) { + this.currentSitemap.on('finish', resolve); + this.currentSitemap.on('error', reject); + this.currentSitemap.end(); + } else { + resolve(); + } + }); + + const onPipelineFinish = new Promise((resolve, reject) => { + if (this.currentSitemapPipeline) { + this.currentSitemapPipeline.on('finish', resolve); + this.currentSitemapPipeline.on('error', reject); + // The pipeline (pipe target) will get it's end() call + // from the sitemap stream ending. + } else { + resolve(); + } + }); + + Promise.all([onFinish, onPipelineFinish]) + .then(() => { + super._flush(cb); + }) + .catch((err) => { + cb(err); + }); + } + + private createSitemap(encoding: string): void { + const [idxItem, currentSitemap, currentSitemapPipeline] = + this.getSitemapStream(this.itemsWritten / this.limit); + currentSitemap.on('error', (err: any) => this.emit('error', err)); + this.currentSitemap = currentSitemap; + this.currentSitemapPipeline = currentSitemapPipeline; + super._transform(idxItem, encoding, () => { + // We are not too fussed about waiting for the index item to be written + // we we'll wait for the file to finish at the end + // and index file write volume tends to be small in comprarison to sitemap + // writes. + // noop + }); } } diff --git a/tests/sitemap-index.test.ts b/tests/sitemap-index.test.ts index 6718779..c4b78eb 100644 --- a/tests/sitemap-index.test.ts +++ b/tests/sitemap-index.test.ts @@ -506,7 +506,7 @@ describe('sitemapAndIndex', () => { function writeData( sms: SitemapStream | SitemapAndIndexStream, - data: any + data ): Promise { if (!sms.write(data)) { return new Promise((resolve) => { From 655b30b71f9614e326c5b21f7c95e1b5d516c878 Mon Sep 17 00:00:00 2001 From: Harold Hunt Date: Tue, 21 May 2024 22:17:27 -0400 Subject: [PATCH 7/9] Issue-423 - jsdocs for SitemapIndex classes --- lib/sitemap-index-stream.ts | 122 ++++++++++++++++++++++++++++++------ tests/sitemap-index.test.ts | 2 +- 2 files changed, 104 insertions(+), 20 deletions(-) diff --git a/lib/sitemap-index-stream.ts b/lib/sitemap-index-stream.ts index 7421bc7..af9c823 100644 --- a/lib/sitemap-index-stream.ts +++ b/lib/sitemap-index-stream.ts @@ -16,17 +16,57 @@ const sitemapIndexTagStart = ''; const closetag = ''; +/** + * Options for the SitemapIndexStream + */ export interface SitemapIndexStreamOptions extends TransformOptions { + /** + * Whether to output the lastmod date only (no time) + * + * @default false + */ lastmodDateOnly?: boolean; + + /** + * How to handle errors in passed in urls + * + * @default ErrorLevel.WARN + */ level?: ErrorLevel; + + /** + * URL to an XSL stylesheet to include in the XML + */ xslUrl?: string; } const defaultStreamOpts: SitemapIndexStreamOptions = {}; + +/** + * `SitemapIndexStream` is a Transform stream that takes `IndexItem`s or sitemap URL strings and outputs a stream of sitemap index XML. + * + * It automatically handles the XML declaration and the opening and closing tags for the sitemap index. + * + * ⚠️ CAUTION: This object is `readable` and must be read (e.g. piped to a file or to /dev/null) + * before `finish` will be emitted. Failure to read the stream will result in hangs. + * + * @extends {Transform} + */ export class SitemapIndexStream extends Transform { lastmodDateOnly: boolean; level: ErrorLevel; xslUrl?: string; private hasHeadOutput: boolean; + + /** + * `SitemapIndexStream` is a Transform stream that takes `IndexItem`s or sitemap URL strings and outputs a stream of sitemap index XML. + * + * It automatically handles the XML declaration and the opening and closing tags for the sitemap index. + * + * ⚠️ CAUTION: This object is `readable` and must be read (e.g. piped to a file or to /dev/null) + * before `finish` will be emitted. Failure to read the stream will result in hangs. + * + * @param {SitemapIndexStreamOptions} [opts=defaultStreamOpts] - Stream options. + */ constructor(opts = defaultStreamOpts) { opts.objectMode = true; super(opts); @@ -82,38 +122,82 @@ export class SitemapIndexStream extends Transform { } } -/** - * Callback for SitemapIndexAndStream that creates a new sitemap stream for a given sitemap index. - * - * Called when a new sitemap file is needed. - * - * The write stream is the destination where the sitemap was piped. - * SitemapAndIndexStream will wait for the `finish` event on each sitemap's - * write stream before moving on to the next sitemap. This ensures that the - * contents of the write stream will be fully written before being used - * by any following operations (e.g. uploading, reading contents for unit tests). - * - * @param i - The index of the sitemap file - * @returns A tuple containing the index item to be written into the sitemap index, the sitemap stream, and the write stream for the sitemap pipe destination - */ -type getSitemapStream = ( +type getSitemapStreamFunc = ( i: number ) => [IndexItem | string, SitemapStream, WriteStream]; +/** + * Options for the SitemapAndIndexStream + * + * @extends {SitemapIndexStreamOptions} + */ export interface SitemapAndIndexStreamOptions extends SitemapIndexStreamOptions { - level?: ErrorLevel; + /** + * Max number of items in each sitemap XML file. + * + * When the limit is reached the current sitemap file will be closed, + * a wait for `finish` on the target write stream will happen, + * and a new sitemap file will be created. + * + * Range: 1 - 50,000 + * + * @default 45000 + */ limit?: number; - getSitemapStream: getSitemapStream; + + /** + * Callback for SitemapIndexAndStream that creates a new sitemap stream for a given sitemap index. + * + * Called when a new sitemap file is needed. + * + * The write stream is the destination where the sitemap was piped. + * SitemapAndIndexStream will wait for the `finish` event on each sitemap's + * write stream before moving on to the next sitemap. This ensures that the + * contents of the write stream will be fully written before being used + * by any following operations (e.g. uploading, reading contents for unit tests). + * + * @param i - The index of the sitemap file + * @returns A tuple containing the index item to be written into the sitemap index, the sitemap stream, and the write stream for the sitemap pipe destination + */ + getSitemapStream: getSitemapStreamFunc; } +/** + * `SitemapAndIndexStream` is a Transform stream that takes in sitemap items, + * writes them to sitemap files, adds the sitemap files to a sitemap index, + * and creates new sitemap files when the count limit is reached. + * + * It waits for the target stream of the current sitemap file to finish before + * moving on to the next if the target stream is returned by the `getSitemapStream` + * callback in the 3rd position of the tuple. + * + * ⚠️ CAUTION: This object is `readable` and must be read (e.g. piped to a file or to /dev/null) + * before `finish` will be emitted. Failure to read the stream will result in hangs. + * + * @extends {SitemapIndexStream} + */ export class SitemapAndIndexStream extends SitemapIndexStream { private itemsWritten: number; - private getSitemapStream: getSitemapStream; + private getSitemapStream: getSitemapStreamFunc; private currentSitemap?: SitemapStream; private limit: number; private currentSitemapPipeline?: WriteStream; + /** + * `SitemapAndIndexStream` is a Transform stream that takes in sitemap items, + * writes them to sitemap files, adds the sitemap files to a sitemap index, + * and creates new sitemap files when the count limit is reached. + * + * It waits for the target stream of the current sitemap file to finish before + * moving on to the next if the target stream is returned by the `getSitemapStream` + * callback in the 3rd position of the tuple. + * + * ⚠️ CAUTION: This object is `readable` and must be read (e.g. piped to a file or to /dev/null) + * before `finish` will be emitted. Failure to read the stream will result in hangs. + * + * @param {SitemapAndIndexStreamOptions} opts - Stream options. + */ constructor(opts: SitemapAndIndexStreamOptions) { opts.objectMode = true; super(opts); @@ -213,7 +297,7 @@ export class SitemapAndIndexStream extends SitemapIndexStream { private createSitemap(encoding: string): void { const [idxItem, currentSitemap, currentSitemapPipeline] = this.getSitemapStream(this.itemsWritten / this.limit); - currentSitemap.on('error', (err: any) => this.emit('error', err)); + currentSitemap.on('error', (err) => this.emit('error', err)); this.currentSitemap = currentSitemap; this.currentSitemapPipeline = currentSitemapPipeline; super._transform(idxItem, encoding, () => { diff --git a/tests/sitemap-index.test.ts b/tests/sitemap-index.test.ts index c4b78eb..3dc79e8 100644 --- a/tests/sitemap-index.test.ts +++ b/tests/sitemap-index.test.ts @@ -13,7 +13,7 @@ import { } from '../lib/sitemap-index-stream'; import { streamToPromise } from '../lib/sitemap-stream'; import { finished as finishedCallback } from 'stream'; -import { readFileSync, WriteStream } from 'node:fs'; +import { readFileSync, WriteStream } from 'fs'; import { promisify } from 'util'; const finished = promisify(finishedCallback); From 94f7ba644b200c0b96e50c0e34cb613f05602f7f Mon Sep 17 00:00:00 2001 From: derduher Date: Tue, 21 May 2024 20:41:26 -0700 Subject: [PATCH 8/9] add workspacefile to npmignore --- .npmignore | 1 + 1 file changed, 1 insertion(+) diff --git a/.npmignore b/.npmignore index 747cc3d..3f9b062 100644 --- a/.npmignore +++ b/.npmignore @@ -69,3 +69,4 @@ stream-write.js toflat.js *.map examples/ +sitemap.code-workspace From 42d32e1d4d963fa0d4f6dddcc814cbc01f800bae Mon Sep 17 00:00:00 2001 From: derduher Date: Tue, 21 May 2024 20:48:02 -0700 Subject: [PATCH 9/9] drop node-12 --- .github/workflows/nodejs.yml | 2 +- .travis.yml | 14 -------------- CHANGELOG.md | 4 ++++ package-lock.json | 4 ++-- package.json | 4 ++-- 5 files changed, 9 insertions(+), 19 deletions(-) delete mode 100644 .travis.yml diff --git a/.github/workflows/nodejs.yml b/.github/workflows/nodejs.yml index c3098df..7f52be2 100644 --- a/.github/workflows/nodejs.yml +++ b/.github/workflows/nodejs.yml @@ -9,7 +9,7 @@ jobs: strategy: matrix: - node-version: [12.x, 14.x, 16.x] + node-version: [14.x, 16.x] steps: - uses: actions/checkout@v1 diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index d7b117e..0000000 --- a/.travis.yml +++ /dev/null @@ -1,14 +0,0 @@ -language: node_js -node_js: - - "12" - - "14" - - "16" -install: - - npm ci -script: - - travis_retry npm test -addons: - apt: - packages: - # Needed for `xmllint`. - - libxml2-utils diff --git a/CHANGELOG.md b/CHANGELOG.md index b63c5f2..e69df8f 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,9 @@ # Changelog +## 8.0.0 + +- drop node 12 support + ## 7.1.2 - fix #425 via #426 thanks to @huntharo update streamToPromise to bubble up errors + jsDoc diff --git a/package-lock.json b/package-lock.json index 46d1b04..a9185e8 100644 --- a/package-lock.json +++ b/package-lock.json @@ -49,8 +49,8 @@ "typescript": "^4.2.4" }, "engines": { - "node": ">=12.0.0", - "npm": ">=5.6.0" + "node": ">=14.0.0", + "npm": ">=6.0.0" } }, "node_modules/@babel/code-frame": { diff --git a/package.json b/package.json index ef62188..910e5c2 100644 --- a/package.json +++ b/package.json @@ -184,7 +184,7 @@ "typescript": "^4.2.4" }, "engines": { - "node": ">=12.0.0", - "npm": ">=5.6.0" + "node": ">=14.0.0", + "npm": ">=6.0.0" } } \ No newline at end of file