Skip to content
This repository has been archived by the owner on Feb 12, 2024. It is now read-only.

feat: Add option to specify chunking algorithm when adding files #1469

Merged
merged 7 commits into from
Aug 24, 2018
7 changes: 6 additions & 1 deletion src/cli/commands/files/add.js
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,10 @@ module.exports = {
default: false,
describe: 'Only chunk and hash, do not write'
},
chunker: {
default: 'size-262144',
describe: 'Chunking algorithm to use, formatted like [size-{size}, rabin, rabin-{avg}, rabin-{min}-{avg}-{max}]'
},
'enable-sharding-experiment': {
type: 'boolean',
default: false
Expand Down Expand Up @@ -194,7 +198,8 @@ module.exports = {
onlyHash: argv.onlyHash,
hashAlg: argv.hash,
wrapWithDirectory: argv.wrapWithDirectory,
pin: argv.pin
pin: argv.pin,
chunker: argv.chunker
}

if (options.enableShardingExperiment && utils.isDaemonOn()) {
Expand Down
11 changes: 9 additions & 2 deletions src/core/components/files.js
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ const OtherBuffer = require('buffer').Buffer
const CID = require('cids')
const toB58String = require('multihashes').toB58String
const errCode = require('err-code')
const parseChunkerString = require('../utils').parseChunkerString

const WRAPPER = 'wrapper/'

Expand Down Expand Up @@ -148,12 +149,18 @@ class AddHelper extends Duplex {
}

module.exports = function files (self) {
function _addPullStream (options) {
function _addPullStream (options = {}) {
let chunkerOptions
try {
chunkerOptions = parseChunkerString(options.chunker)
} catch (err) {
return pull.map(() => { throw err })
}
const opts = Object.assign({}, {
shardSplitThreshold: self._options.EXPERIMENTAL.sharding
? 1000
: Infinity
}, options)
}, options, chunkerOptions)

if (opts.hashAlg && opts.cidVersion !== 1) {
opts.cidVersion = 1
Expand Down
81 changes: 81 additions & 0 deletions src/core/utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -110,5 +110,86 @@ const resolvePath = promisify(function (objectAPI, ipfsPaths, callback) {
}, callback)
})

/**
* Parses chunker string into options used by DAGBuilder in ipfs-unixfs-engine
*
*
* @param {String} chunker Chunker algorithm supported formats:
* "size-{size}"
* "rabin"
* "rabin-{avg}"
* "rabin-{min}-{avg}-{max}"
*
* @return {Object} Chunker options for DAGBuilder
*/
function parseChunkerString (chunker) {
if (!chunker) {
return {
chunker: 'fixed'
}
} else if (chunker.startsWith('size-')) {
const sizeStr = chunker.split('-')[1]
const size = parseInt(sizeStr)
if (isNaN(size)) {
throw new Error('Chunker parameter size must be an integer')
}
return {
chunker: 'fixed',
chunkerOptions: {
maxChunkSize: size
}
}
} else if (chunker.startsWith('rabin')) {
return {
chunker: 'rabin',
chunkerOptions: parseRabinString(chunker)
}
} else {
throw new Error(`Unrecognized chunker option: ${chunker}`)
}
}

/**
* Parses rabin chunker string
*
* @param {String} chunker Chunker algorithm supported formats:
* "rabin"
* "rabin-{avg}"
* "rabin-{min}-{avg}-{max}"
*
* @return {Object} rabin chunker options
*/
function parseRabinString (chunker) {
const options = {}
const parts = chunker.split('-')
switch (parts.length) {
case 1:
options.avgChunkSize = 262144
break
case 2:
options.avgChunkSize = parseChunkSize(parts[1], 'avg')
break
case 4:
options.minChunkSize = parseChunkSize(parts[1], 'min')
options.avgChunkSize = parseChunkSize(parts[2], 'avg')
options.maxChunkSize = parseChunkSize(parts[3], 'max')
break
default:
throw new Error('Incorrect chunker format (expected "rabin" "rabin-[avg]" or "rabin-[min]-[avg]-[max]"')
}

return options
}

function parseChunkSize (str, name) {
let size = parseInt(str)
if (isNaN(size)) {
throw new Error(`Chunker parameter ${name} must be an integer`)
}

return size
}

exports.parseIpfsPath = parseIpfsPath
exports.resolvePath = resolvePath
exports.parseChunkerString = parseChunkerString
6 changes: 4 additions & 2 deletions src/http/api/resources/files.js
Original file line number Diff line number Diff line change
Expand Up @@ -157,7 +157,8 @@ exports.add = {
'raw-leaves': Joi.boolean(),
'only-hash': Joi.boolean(),
pin: Joi.boolean().default(true),
'wrap-with-directory': Joi.boolean()
'wrap-with-directory': Joi.boolean(),
chunker: Joi.string()
})
// TODO: Necessary until validate "recursive", "stream-channels" etc.
.options({ allowUnknown: true })
Expand Down Expand Up @@ -221,7 +222,8 @@ exports.add = {
onlyHash: request.query['only-hash'],
hashAlg: request.query['hash'],
wrapWithDirectory: request.query['wrap-with-directory'],
pin: request.query.pin
pin: request.query.pin,
chunker: request.query.chunker
}

const aborter = abortable()
Expand Down
67 changes: 67 additions & 0 deletions test/core/utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -157,4 +157,71 @@ describe('utils', () => {
})
})
})

describe('parseChunkerString', () => {
it('handles an empty string', () => {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please add test cases for error cases like an unsupported chunker.

const options = utils.parseChunkerString('')
expect(options).to.have.property('chunker').to.equal('fixed')
})

it('handles a null chunker string', () => {
const options = utils.parseChunkerString(null)
expect(options).to.have.property('chunker').to.equal('fixed')
})

it('parses a fixed size string', () => {
const options = utils.parseChunkerString('size-512')
expect(options).to.have.property('chunker').to.equal('fixed')
expect(options)
.to.have.property('chunkerOptions')
.to.have.property('maxChunkSize')
.to.equal(512)
})

it('parses a rabin string without size', () => {
const options = utils.parseChunkerString('rabin')
expect(options).to.have.property('chunker').to.equal('rabin')
expect(options)
.to.have.property('chunkerOptions')
.to.have.property('avgChunkSize')
})

it('parses a rabin string with only avg size', () => {
const options = utils.parseChunkerString('rabin-512')
expect(options).to.have.property('chunker').to.equal('rabin')
expect(options)
.to.have.property('chunkerOptions')
.to.have.property('avgChunkSize')
.to.equal(512)
})

it('parses a rabin string with min, avg, and max', () => {
const options = utils.parseChunkerString('rabin-42-92-184')
expect(options).to.have.property('chunker').to.equal('rabin')
expect(options).to.have.property('chunkerOptions')
expect(options.chunkerOptions).to.have.property('minChunkSize').to.equal(42)
expect(options.chunkerOptions).to.have.property('avgChunkSize').to.equal(92)
expect(options.chunkerOptions).to.have.property('maxChunkSize').to.equal(184)
})

it('throws an error for unsupported chunker type', () => {
const fn = () => utils.parseChunkerString('fake-512')
expect(fn).to.throw(Error)
})

it('throws an error for incorrect format string', () => {
const fn = () => utils.parseChunkerString('fixed-abc')
expect(fn).to.throw(Error)
})

it('throws an error for incorrect rabin format string', () => {
let fn = () => utils.parseChunkerString('rabin-1-2-3-4')
expect(fn).to.throw(Error)
})

it('throws an error for non integer rabin parameters', () => {
const fn = () => utils.parseChunkerString('rabin-abc')
expect(fn).to.throw(Error)
})
})
})