Skip to content

Commit

Permalink
feat(core): add option to specify chunking algorithm
Browse files Browse the repository at this point in the history
This allows the chunking algorithm, and options to be specified when using the adding files.
Specifying chunker and options are identical to go-ipfs and support the following formats:
default, size-{size}, rabin, rabin-{avg}, rabin-{min}-{avg}-{max}
This is required to achieve parity with go-ipfs.

Fixes ipfs#1283

License: MIT
Signed-off-by: Dan Ordille <dordille@gmail.com>
  • Loading branch information
dordille authored and alanshaw committed Aug 17, 2018
1 parent 9368f37 commit 84b0425
Show file tree
Hide file tree
Showing 5 changed files with 147 additions and 4 deletions.
7 changes: 6 additions & 1 deletion src/cli/commands/files/add.js
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,10 @@ module.exports = {
default: false,
describe: 'Only chunk and hash, do not write'
},
chunker: {
default: 'default',
describe: 'Chunking algorithm to use, formatted like [default, size-{size}, rabin, rabin-{avg}, rabin-{min}-{avg}-{max}]'
},
'enable-sharding-experiment': {
type: 'boolean',
default: false
Expand Down Expand Up @@ -195,7 +199,8 @@ module.exports = {
onlyHash: argv.onlyHash,
hashAlg: argv.hash,
wrapWithDirectory: argv.wrapWithDirectory,
pin: argv.pin
pin: argv.pin,
chunker: argv.chunker
}

if (options.enableShardingExperiment && utils.isDaemonOn()) {
Expand Down
6 changes: 4 additions & 2 deletions src/core/components/files.js
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ const OtherBuffer = require('buffer').Buffer
const CID = require('cids')
const toB58String = require('multihashes').toB58String
const errCode = require('err-code')
const parseChunkerString = require('../utils').parseChunkerString

const WRAPPER = 'wrapper/'

Expand Down Expand Up @@ -148,12 +149,13 @@ class AddHelper extends Duplex {
}

module.exports = function files (self) {
function _addPullStream (options) {
function _addPullStream (options = {}) {
const chunkerOptions = parseChunkerString(options.chunker)
const opts = Object.assign({}, {
shardSplitThreshold: self._options.EXPERIMENTAL.sharding
? 1000
: Infinity
}, options)
}, options, chunkerOptions)

if (opts.hashAlg && opts.cidVersion !== 1) {
opts.cidVersion = 1
Expand Down
88 changes: 88 additions & 0 deletions src/core/utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -110,5 +110,93 @@ const resolvePath = promisify(function (objectAPI, ipfsPaths, callback) {
}, callback)
})

/**
* Parses chunker string into options used by DAGBuilder in ipfs-unixfs-engine
*
*
* @param {String} chunker Chunker algorithm supported formats:
* "default" ("")
* "size-{size}",
* "rabin"
* "rabin-{avg}"
* "rabin-{min}-{avg}-{max}"
*
* @return {Object} Chunker options for DAGBuilder
*/
function parseChunkerString (chunker) {
if (!chunker || chunker === '' || chunker === 'default') {
return {
chunker: 'fixed'
}
} else if (chunker.startsWith('size-')) {
const sizeStr = chunker.split('-')[1]
const size = parseInt(sizeStr)
if (isNaN(size)) {
throw new Error('Parameter avg must be an integer')
}
return {
chunker: 'fixed',
chunkerOptions: {
maxChunkSize: size
}
}
} else if (chunker.startsWith('rabin')) {
return {
chunker: 'rabin',
chunkerOptions: parseRabinString(chunker)
}
} else {
throw new Error(`unrecognized chunker option: ${chunker}`)
}
}

/**
* Parses rabin chunker string
*
* @param {String} chunker Chunker algorithm supported formats:
* "rabin"
* "rabin-{avg}"
* "rabin-{min}-{avg}-{max}"
*
* @return {Object} rabin chunker options
*/
function parseRabinString (chunker) {
const options = {}
const parts = chunker.split('-')
switch (parts.length) {
case 1:
options.avgChunkSize = 262144
break
case 2:
options.avgChunkSize = parseInt(parts[1])
if (isNaN(options.avgChunkSize)) {
throw new Error('Parameter avg must be an integer')
}
break
case 4:
options.minChunkSize = parseSub(parts[1].split(':'), 'min')
options.avgChunkSize = parseSub(parts[2].split(':'), 'avg')
options.maxChunkSize = parseSub(parts[3].split(':'), 'max')
break
default:
throw new Error('incorrect format (expected "rabin" "rabin-[avg]" or "rabin-[min]-[avg]-[max]"')
}

return options
}

function parseSub (sub, name) {
if (sub.length > 1 && sub[0] !== name) {
throw new Error('Parameter order must be min:avg:max')
}
let size = parseInt(sub[sub.length - 1])
if (isNaN(size)) {
throw new Error(`Parameter ${name} must be an integer`)
}

return size
}

exports.parseIpfsPath = parseIpfsPath
exports.resolvePath = resolvePath
exports.parseChunkerString = parseChunkerString
3 changes: 2 additions & 1 deletion src/http/api/resources/files.js
Original file line number Diff line number Diff line change
Expand Up @@ -221,7 +221,8 @@ exports.add = {
onlyHash: request.query['only-hash'],
hashAlg: request.query['hash'],
wrapWithDirectory: request.query['wrap-with-directory'],
pin: request.query.pin
pin: request.query.pin,
chunker: request.query['chunker']
}

const aborter = abortable()
Expand Down
47 changes: 47 additions & 0 deletions test/core/utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -157,4 +157,51 @@ describe('utils', () => {
})
})
})

describe('parseChunkerString', () => {
it('handles an empty string', () => {
const options = utils.parseChunkerString('')
expect(options).to.have.property('chunker').to.equal('fixed')
})

it('handles a null chunker string', () => {
const options = utils.parseChunkerString(null)
expect(options).to.have.property('chunker').to.equal('fixed')
})

it('parses a fixed size string', () => {
const options = utils.parseChunkerString('size-512')
expect(options).to.have.property('chunker').to.equal('fixed')
expect(options)
.to.have.property('chunkerOptions')
.to.have.property('maxChunkSize')
.to.equal(512)
})

it('parses a rabin string without size', () => {
const options = utils.parseChunkerString('rabin')
expect(options).to.have.property('chunker').to.equal('rabin')
expect(options)
.to.have.property('chunkerOptions')
.to.have.property('avgChunkSize')
})

it('parses a rabin string with only avg size', () => {
const options = utils.parseChunkerString('rabin-512')
expect(options).to.have.property('chunker').to.equal('rabin')
expect(options)
.to.have.property('chunkerOptions')
.to.have.property('avgChunkSize')
.to.equal(512)
})

it('parses a rabin string with min, avg, and max', () => {
const options = utils.parseChunkerString('rabin-42-92-184')
expect(options).to.have.property('chunker').to.equal('rabin')
expect(options).to.have.property('chunkerOptions')
expect(options.chunkerOptions).to.have.property('minChunkSize').to.equal(42)
expect(options.chunkerOptions).to.have.property('avgChunkSize').to.equal(92)
expect(options.chunkerOptions).to.have.property('maxChunkSize').to.equal(184)
})
})
})

0 comments on commit 84b0425

Please sign in to comment.