Skip to content

Commit

Permalink
For issue #48, add Gunzipper that relies on DecompressionStream('gzip').
Browse files Browse the repository at this point in the history
  • Loading branch information
codedread committed Feb 5, 2024
1 parent d01610a commit 813b154
Show file tree
Hide file tree
Showing 4 changed files with 179 additions and 1 deletion.
27 changes: 27 additions & 0 deletions archive/decompress.js
Original file line number Diff line number Diff line change
Expand Up @@ -319,6 +319,31 @@ export class Untarrer extends Unarchiver {
getScriptFileName() { return './untar.js'; };
}

/**
* IMPORTANT NOTES for Gunzipper:
* 1) A Gunzipper will only ever emit one EXTRACT event, because a gzipped file only ever contains
* a single file.
* 2) If the gzipped file does not include the original filename as a FNAME block, then the
* UnarchivedFile in the UnarchiveExtractEvent will not include a filename. It will be up to the
* client to re-assemble the filename (if needed).
* 3) update() is not supported on a Gunzipper, since the current implementation relies on runtime
* support for DecompressionStream('gzip') which can throw hard-to-detect errors reading only
* only part of a file.
* 4) PROGRESS events are not yet supported in Gunzipper.
*/
export class Gunzipper extends Unarchiver {
/**
* @param {ArrayBuffer} ab
* @param {UnarchiverOptions} options
*/
constructor(ab, options = {}) {
super(ab, options);
}

getMIMEType() { return 'application/gzip'; }
getScriptFileName() { return './gunzip.js'; }
}

// TODO(2.0): When up-revving to a major new version, remove the string type for options.

/**
Expand All @@ -344,6 +369,8 @@ export function getUnarchiver(ab, options = {}) {
unarchiver = new Unrarrer(ab, options);
} else if (mimeType === 'application/zip') { // PK (Zip)
unarchiver = new Unzipper(ab, options);
} else if (mimeType === 'application/gzip') { // GZIP
unarchiver = new Gunzipper(ab, options);
} else { // Try with tar
unarchiver = new Untarrer(ab, options);
}
Expand Down
125 changes: 125 additions & 0 deletions archive/gunzip.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,125 @@
/**
* gunzip.js
*
* Licensed under the MIT License
*
* Copyright(c) 2024 Google Inc.
*
* Reference Documentation:
*
* https://www.ietf.org/rfc/rfc1952.txt
*/

import { BitStream } from '../io/bitstream.js';
import { ByteStream } from '../io/bytestream.js';

/** @type {MessagePort} */
let hostPort;

/** @type {ByteStream} */
let bstream = null;
// undefined unless a FNAME block is present.
let filename;

const err = str => hostPort.postMessage({ type: 'error', msg: str });

async function gunzip() {
const sig = bstream.readBytes(2);
if (sig[0] !== 0x1F || sig[1] !== 0x8B) {
const errMsg = `First two bytes not 0x1F, 0x8B: ${sig[0].toString(16)} ${sig[1].toString(16)}`;
err(errMsg);
return;
}
const compressionMethod = bstream.readNumber(1);
if (compressionMethod !== 8) {
const errMsg = `Compression method ${compressionMethod} not supported`;
err(errMsg);
return;
}

// Parse the GZIP header to see if we can find a filename (FNAME block).
const flags = new BitStream(bstream.readBytes(1).buffer);
flags.skip(1); // skip FTEXT bit
const fhcrc = flags.readBits(1);
const fextra = flags.readBits(1);
const fname = flags.readBits(1);
const fcomment = flags.readBits(1);

bstream.skip(4); // MTIME
bstream.skip(1); // XFL
bstream.skip(1); // OS

if (fextra) {
const xlen = bstream.readNumber(2);
bstream.skip(xlen);
}

if (fname) {
// Find the null-terminator byte.
let numBytes = 0;
const findNull = bstream.tee();
while (findNull.readNumber(1) !== 0) numBytes++;
filename = bstream.readString(numBytes);
}

if (fcomment) {
// Find the null-terminator byte.
let numBytes = 0;
const findNull = bstream.tee();
while (findNull.readNumber(1) !== 0) numBytes++;
bstream.skip(numBytes); // COMMENT
}

if (fhcrc) {
bstream.readNumber(2); // CRC16
}

// Now try to use native implementation of INFLATE, if supported by the runtime.
const blob = new Blob([bstream.bytes.buffer]);
const decompressedStream = blob.stream().pipeThrough(new DecompressionStream('gzip'));
const fileData = new Uint8Array(await new Response(decompressedStream).arrayBuffer());
const unarchivedFile = { filename, fileData };
hostPort.postMessage({ type: 'extract', unarchivedFile }, [fileData.buffer]);

// TODO: Supported chunked decompression?
// TODO: Fall through to non-native implementation via inflate() ?

hostPort.postMessage({ type: 'finish', metadata: {} });
}

// event.data.file has the first ArrayBuffer.
const onmessage = async function (event) {
const bytes = event.data.file;

if (!bstream) {
bstream = new ByteStream(bytes);
bstream.setLittleEndian(true);
} else {
throw `Gunzipper does not calling update() with more bytes. Send the whole file with start().`
}

await gunzip();
};

/**
* Connect the host to the gunzip implementation with the given MessagePort.
* @param {MessagePort} port
*/
export function connect(port) {
if (hostPort) {
throw `connect(): hostPort already connected in gunzip.js`;
}

hostPort = port;
port.onmessage = onmessage;
}

export function disconnect() {
if (!hostPort) {
throw `disconnect(): hostPort was not connected in gunzip.js`;
}

hostPort = null;
bstream = null;
filename = undefined;
}
28 changes: 27 additions & 1 deletion tests/archive-decompress.spec.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import * as fs from 'node:fs';
import 'mocha';
import { expect } from 'chai';

import { Unarchiver, Unrarrer, Untarrer, Unzipper, getUnarchiver } from '../archive/decompress.js';
import { Gunzipper, Unarchiver, getUnarchiver } from '../archive/decompress.js';

const PATH = `tests/archive-testfiles/`;

Expand Down Expand Up @@ -69,4 +69,30 @@ describe('bitjs.archive.decompress', () => {
expect(extractEvtFiredForOnExtract).equals(true);
});
}

describe('gunzip', () => {
it('can unzip a file', async () => {
const bufs = new Map(inputArrayBuffers);
const nodeBuf = fs.readFileSync(`${PATH}sample-1-slowest.txt.gz`);
const ab = nodeBuf.buffer.slice(nodeBuf.byteOffset, nodeBuf.byteOffset + nodeBuf.length);
let gunzipper = getUnarchiver(ab, {debug: true});
expect(gunzipper instanceof Gunzipper).equals(true);
let extractEvtFiredForOnExtract = false;

gunzipper.onExtract(evt => {
extractEvtFiredForOnExtract = true;
const {filename, fileData} = evt.unarchivedFile;
expect(filename).equals('sample-1.txt');

const ab = bufs.get('sample-1.txt');
expect(fileData.byteLength).equals(ab.byteLength);
for (let b = 0; b < fileData.byteLength; ++b) {
expect(fileData[b] === ab[b]);
}
});

await gunzipper.start();
expect(extractEvtFiredForOnExtract).equals(true);
});
});
});
Binary file added tests/archive-testfiles/sample-1-slowest.txt.gz
Binary file not shown.

0 comments on commit 813b154

Please sign in to comment.