From db6f53928650a04b340ecdc01db2d49937e5d63c Mon Sep 17 00:00:00 2001 From: isaacs Date: Mon, 4 Sep 2023 22:18:19 -0700 Subject: [PATCH] file inference improvements for .tbr and .tgz When unpacking, only infer brotli compression from the filename if the first 512 bytes are an invalid tar header (or the stream is less than 512 bytes) While Brotli doesn't give us magic header bytes like gzip, we can be reasonably sure that a .tbr file starting with 512 bytes of valid tar data is almost certainly not a brotli compressed archive. And a .tbr file starting with the magic gzip bytes is almost certainly a gzip archive, and not brotli, despite what the filename says. In all cases, if explicit boolean or object values appear in the options for either gzip or brotli, we respect that, and ignore the filename. --- lib/pack.js | 1 + lib/parse.js | 45 ++++++++++++++++++++++++++++++++++++++++--- test/parse.js | 53 +++++++++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 94 insertions(+), 5 deletions(-) diff --git a/lib/pack.js b/lib/pack.js index 789a2a71..d533a068 100644 --- a/lib/pack.js +++ b/lib/pack.js @@ -79,6 +79,7 @@ const Pack = warner(class Pack extends Minipass { this.portable = !!opt.portable this.zip = null + if (opt.gzip || opt.brotli) { if (opt.gzip && opt.brotli) { throw new TypeError('gzip and brotli are mutually exclusive') diff --git a/lib/parse.js b/lib/parse.js index 6906d059..94e53042 100644 --- a/lib/parse.js +++ b/lib/parse.js @@ -100,7 +100,13 @@ module.exports = warner(class Parser extends EE { // Unlike gzip, brotli doesn't have any magic bytes to identify it // Users need to explicitly tell us they're extracting a brotli file // Or we infer from the file extension - this.brotli = opt.brotli || (opt.file && (opt.file.endsWith('.tar.br') || opt.file.endsWith('.tbr'))) + const isTBR = (opt.file && ( + opt.file.endsWith('.tar.br') || opt.file.endsWith('.tbr'))) + // if it's a tbr file it MIGHT be brotli, but we don't know until + // we look at it and verify it's not a valid tar file. + this.brotli = !opt.gzip && opt.brotli !== undefined ? opt.brotli + : isTBR ? undefined + : false // have to set this so that streams are ok piping into it this.writable = true @@ -351,7 +357,9 @@ module.exports = warner(class Parser extends EE { } // first write, might be gzipped - if (this[UNZIP] === null && chunk) { + const needSniff = this[UNZIP] === null || + this.brotli === undefined && this[UNZIP] === false + if (needSniff && chunk) { if (this[BUFFER]) { chunk = Buffer.concat([this[BUFFER], chunk]) this[BUFFER] = null @@ -360,15 +368,45 @@ module.exports = warner(class Parser extends EE { this[BUFFER] = chunk return true } + + // look for gzip header for (let i = 0; this[UNZIP] === null && i < gzipHeader.length; i++) { if (chunk[i] !== gzipHeader[i]) { this[UNZIP] = false } } + + const maybeBrotli = this.brotli === undefined + if (this[UNZIP] === false && maybeBrotli) { + // read the first header to see if it's a valid tar file. If so, + // we can safely assume that it's not actually brotli, despite the + // .tbr or .tar.br file extension. + // if we ended before getting a full chunk, yes, def brotli + if (chunk.length < 512) { + if (this[ENDED]) { + this.brotli = true + } else { + this[BUFFER] = chunk + return true + } + } else { + // if it's tar, it's pretty reliably not brotli, chances of + // that happening are astronomical. + try { + new Header(chunk.slice(0, 512)) + this.brotli = false + } catch (_) { + this.brotli = true + } + } + } + if (this[UNZIP] === null || (this[UNZIP] === false && this.brotli)) { const ended = this[ENDED] this[ENDED] = false - this[UNZIP] = this.brotli ? new zlib.BrotliDecompress() : new zlib.Unzip() + this[UNZIP] = this[UNZIP] === null + ? new zlib.Unzip() + : new zlib.BrotliDecompress() this[UNZIP].on('data', chunk => this[CONSUMECHUNK](chunk)) this[UNZIP].on('error', er => this.abort(er)) this[UNZIP].on('end', _ => { @@ -506,6 +544,7 @@ module.exports = warner(class Parser extends EE { this[UNZIP].end(chunk) } else { this[ENDED] = true + if (this.brotli === undefined) chunk = chunk || Buffer.alloc(0) this.write(chunk) } } diff --git a/test/parse.js b/test/parse.js index dff01f3c..549b3701 100644 --- a/test/parse.js +++ b/test/parse.js @@ -80,7 +80,7 @@ t.test('fixture tests', t => { const eventsFile = parsedir + '/' + base + tail const expect = require(eventsFile) - t.test('one byte at a time', t => { + t.test('uncompressed one byte at a time', t => { const bs = new ByteStream() const opt = (maxMeta || filter || strict) ? { maxMetaEntrySize: maxMeta, @@ -93,7 +93,7 @@ t.test('fixture tests', t => { bs.end(tardata) }) - t.test('all at once', t => { + t.test('uncompressed all at once', t => { const p = new Parse({ maxMetaEntrySize: maxMeta, filter: filter ? (path, entry) => entry.size % 2 !== 0 : null, @@ -103,6 +103,31 @@ t.test('fixture tests', t => { p.end(tardata) }) + t.test('uncompressed one byte at a time, filename .tbr', t => { + const bs = new ByteStream() + const opt = (maxMeta || filter || strict) ? { + maxMetaEntrySize: maxMeta, + filter: filter ? (path, entry) => entry.size % 2 !== 0 : null, + strict: strict, + file: 'example.tbr', + } : null + const bp = new Parse(opt) + trackEvents(t, expect, bp) + bs.pipe(bp) + bs.end(tardata) + }) + + t.test('uncompressed all at once, filename .tar.br', t => { + const p = new Parse({ + maxMetaEntrySize: maxMeta, + filter: filter ? (path, entry) => entry.size % 2 !== 0 : null, + strict: strict, + file: 'example.tar.br', + }) + trackEvents(t, expect, p) + p.end(tardata) + }) + t.test('gzipped all at once', t => { const p = new Parse({ maxMetaEntrySize: maxMeta, @@ -113,6 +138,17 @@ t.test('fixture tests', t => { p.end(zlib.gzipSync(tardata)) }) + t.test('gzipped all at once, filename .tbr', t => { + const p = new Parse({ + maxMetaEntrySize: maxMeta, + filter: filter ? (path, entry) => entry.size % 2 !== 0 : null, + strict: strict, + file: 'example.tbr', + }) + trackEvents(t, expect, p) + p.end(zlib.gzipSync(tardata)) + }) + t.test('gzipped byte at a time', t => { const bs = new ByteStream() const bp = new Parse({ @@ -171,6 +207,19 @@ t.test('fixture tests', t => { bs.end(zlib.brotliCompressSync(tardata)) }) + t.test('compress with brotli .tbr byte at a time', t => { + const bs = new ByteStream() + const bp = new Parse({ + maxMetaEntrySize: maxMeta, + filter: filter ? (path, entry) => entry.size % 2 !== 0 : null, + strict: strict, + file: 'example.tbr', + }) + trackEvents(t, expect, bp) + bs.pipe(bp) + bs.end(zlib.brotliCompressSync(tardata)) + }) + t.test('async chunks', t => { const p = new Parse({ maxMetaEntrySize: maxMeta,