From 84b042512e49f6360ae81fa777caca219a2787ec Mon Sep 17 00:00:00 2001 From: Dan Ordille Date: Thu, 26 Jul 2018 12:33:40 -0400 Subject: [PATCH] feat(core): add option to specify chunking algorithm This allows the chunking algorithm, and options to be specified when using the adding files. Specifying chunker and options are identical to go-ipfs and support the following formats: default, size-{size}, rabin, rabin-{avg}, rabin-{min}-{avg}-{max} This is required to achieve parity with go-ipfs. Fixes #1283 License: MIT Signed-off-by: Dan Ordille --- src/cli/commands/files/add.js | 7 ++- src/core/components/files.js | 6 ++- src/core/utils.js | 88 +++++++++++++++++++++++++++++++++ src/http/api/resources/files.js | 3 +- test/core/utils.js | 47 ++++++++++++++++++ 5 files changed, 147 insertions(+), 4 deletions(-) diff --git a/src/cli/commands/files/add.js b/src/cli/commands/files/add.js index 2547150e91..141686ad1c 100644 --- a/src/cli/commands/files/add.js +++ b/src/cli/commands/files/add.js @@ -135,6 +135,10 @@ module.exports = { default: false, describe: 'Only chunk and hash, do not write' }, + chunker: { + default: 'default', + describe: 'Chunking algorithm to use, formatted like [default, size-{size}, rabin, rabin-{avg}, rabin-{min}-{avg}-{max}]' + }, 'enable-sharding-experiment': { type: 'boolean', default: false @@ -195,7 +199,8 @@ module.exports = { onlyHash: argv.onlyHash, hashAlg: argv.hash, wrapWithDirectory: argv.wrapWithDirectory, - pin: argv.pin + pin: argv.pin, + chunker: argv.chunker } if (options.enableShardingExperiment && utils.isDaemonOn()) { diff --git a/src/core/components/files.js b/src/core/components/files.js index de0b01c562..8bf66eb972 100644 --- a/src/core/components/files.js +++ b/src/core/components/files.js @@ -18,6 +18,7 @@ const OtherBuffer = require('buffer').Buffer const CID = require('cids') const toB58String = require('multihashes').toB58String const errCode = require('err-code') +const parseChunkerString = require('../utils').parseChunkerString const WRAPPER = 'wrapper/' @@ -148,12 +149,13 @@ class AddHelper extends Duplex { } module.exports = function files (self) { - function _addPullStream (options) { + function _addPullStream (options = {}) { + const chunkerOptions = parseChunkerString(options.chunker) const opts = Object.assign({}, { shardSplitThreshold: self._options.EXPERIMENTAL.sharding ? 1000 : Infinity - }, options) + }, options, chunkerOptions) if (opts.hashAlg && opts.cidVersion !== 1) { opts.cidVersion = 1 diff --git a/src/core/utils.js b/src/core/utils.js index 55ac9be2a2..6c9488e293 100644 --- a/src/core/utils.js +++ b/src/core/utils.js @@ -110,5 +110,93 @@ const resolvePath = promisify(function (objectAPI, ipfsPaths, callback) { }, callback) }) +/** + * Parses chunker string into options used by DAGBuilder in ipfs-unixfs-engine + * + * + * @param {String} chunker Chunker algorithm supported formats: + * "default" ("") + * "size-{size}", + * "rabin" + * "rabin-{avg}" + * "rabin-{min}-{avg}-{max}" + * + * @return {Object} Chunker options for DAGBuilder + */ +function parseChunkerString (chunker) { + if (!chunker || chunker === '' || chunker === 'default') { + return { + chunker: 'fixed' + } + } else if (chunker.startsWith('size-')) { + const sizeStr = chunker.split('-')[1] + const size = parseInt(sizeStr) + if (isNaN(size)) { + throw new Error('Parameter avg must be an integer') + } + return { + chunker: 'fixed', + chunkerOptions: { + maxChunkSize: size + } + } + } else if (chunker.startsWith('rabin')) { + return { + chunker: 'rabin', + chunkerOptions: parseRabinString(chunker) + } + } else { + throw new Error(`unrecognized chunker option: ${chunker}`) + } +} + +/** + * Parses rabin chunker string + * + * @param {String} chunker Chunker algorithm supported formats: + * "rabin" + * "rabin-{avg}" + * "rabin-{min}-{avg}-{max}" + * + * @return {Object} rabin chunker options + */ +function parseRabinString (chunker) { + const options = {} + const parts = chunker.split('-') + switch (parts.length) { + case 1: + options.avgChunkSize = 262144 + break + case 2: + options.avgChunkSize = parseInt(parts[1]) + if (isNaN(options.avgChunkSize)) { + throw new Error('Parameter avg must be an integer') + } + break + case 4: + options.minChunkSize = parseSub(parts[1].split(':'), 'min') + options.avgChunkSize = parseSub(parts[2].split(':'), 'avg') + options.maxChunkSize = parseSub(parts[3].split(':'), 'max') + break + default: + throw new Error('incorrect format (expected "rabin" "rabin-[avg]" or "rabin-[min]-[avg]-[max]"') + } + + return options +} + +function parseSub (sub, name) { + if (sub.length > 1 && sub[0] !== name) { + throw new Error('Parameter order must be min:avg:max') + } + let size = parseInt(sub[sub.length - 1]) + if (isNaN(size)) { + throw new Error(`Parameter ${name} must be an integer`) + } + + return size +} + exports.parseIpfsPath = parseIpfsPath exports.resolvePath = resolvePath +exports.parseChunkerString = parseChunkerString diff --git a/src/http/api/resources/files.js b/src/http/api/resources/files.js index 0370c4b0c2..645ddaceb7 100644 --- a/src/http/api/resources/files.js +++ b/src/http/api/resources/files.js @@ -221,7 +221,8 @@ exports.add = { onlyHash: request.query['only-hash'], hashAlg: request.query['hash'], wrapWithDirectory: request.query['wrap-with-directory'], - pin: request.query.pin + pin: request.query.pin, + chunker: request.query['chunker'] } const aborter = abortable() diff --git a/test/core/utils.js b/test/core/utils.js index b5c84b15c1..0e669d5389 100644 --- a/test/core/utils.js +++ b/test/core/utils.js @@ -157,4 +157,51 @@ describe('utils', () => { }) }) }) + + describe('parseChunkerString', () => { + it('handles an empty string', () => { + const options = utils.parseChunkerString('') + expect(options).to.have.property('chunker').to.equal('fixed') + }) + + it('handles a null chunker string', () => { + const options = utils.parseChunkerString(null) + expect(options).to.have.property('chunker').to.equal('fixed') + }) + + it('parses a fixed size string', () => { + const options = utils.parseChunkerString('size-512') + expect(options).to.have.property('chunker').to.equal('fixed') + expect(options) + .to.have.property('chunkerOptions') + .to.have.property('maxChunkSize') + .to.equal(512) + }) + + it('parses a rabin string without size', () => { + const options = utils.parseChunkerString('rabin') + expect(options).to.have.property('chunker').to.equal('rabin') + expect(options) + .to.have.property('chunkerOptions') + .to.have.property('avgChunkSize') + }) + + it('parses a rabin string with only avg size', () => { + const options = utils.parseChunkerString('rabin-512') + expect(options).to.have.property('chunker').to.equal('rabin') + expect(options) + .to.have.property('chunkerOptions') + .to.have.property('avgChunkSize') + .to.equal(512) + }) + + it('parses a rabin string with min, avg, and max', () => { + const options = utils.parseChunkerString('rabin-42-92-184') + expect(options).to.have.property('chunker').to.equal('rabin') + expect(options).to.have.property('chunkerOptions') + expect(options.chunkerOptions).to.have.property('minChunkSize').to.equal(42) + expect(options.chunkerOptions).to.have.property('avgChunkSize').to.equal(92) + expect(options.chunkerOptions).to.have.property('maxChunkSize').to.equal(184) + }) + }) })