From 524e22d732fe77050e9f81f162dac702cf805b4f Mon Sep 17 00:00:00 2001 From: achingbrain Date: Fri, 18 May 2018 19:36:38 +0100 Subject: [PATCH 1/5] chore: use same hash for empty node as go --- test/importer.js | 2 +- test/with-dag-api.js | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/test/importer.js b/test/importer.js index a5685a03..dfbabbe5 100644 --- a/test/importer.js +++ b/test/importer.js @@ -214,7 +214,7 @@ module.exports = (repo) => { expect(err).to.not.exist() expect(nodes.length).to.be.eql(1) // always yield empty node - expect(mh.toB58String(nodes[0].multihash)).to.be.eql('QmfJMCvenrj4SKKRc48DYPxwVdS44qCUCqqtbqhJuSTWXP') + expect(mh.toB58String(nodes[0].multihash)).to.be.eql('QmbFMke1KXqnYyBBWxB74N4c5SBnJMVAiMNRcGu6x1AwQH') done() })) }) diff --git a/test/with-dag-api.js b/test/with-dag-api.js index e93a9ee2..1f68442c 100644 --- a/test/with-dag-api.js +++ b/test/with-dag-api.js @@ -225,7 +225,7 @@ describe('with dag-api', function () { expect(err).to.not.exist() expect(nodes.length).to.be.eql(1) // always yield empty node - expect(mh.toB58String(nodes[0].multihash)).to.be.eql('QmfJMCvenrj4SKKRc48DYPxwVdS44qCUCqqtbqhJuSTWXP') + expect(mh.toB58String(nodes[0].multihash)).to.be.eql('QmbFMke1KXqnYyBBWxB74N4c5SBnJMVAiMNRcGu6x1AwQH') done() })) }) From e0d0beb5ab7c70eaa65cd54abe5ec9729e239ba3 Mon Sep 17 00:00:00 2001 From: achingbrain Date: Fri, 25 May 2018 18:12:06 +0100 Subject: [PATCH 2/5] chore: return empty buffer for files with no data Part of making the hases emitted by js-ipfs the same as go-ipfs meant unsetting the data attribute of UnixFS metadata objects if the data buffer had 0 length. ipfs.files.get converts a pull stream of files into a readable stream of files and all of the content pull streams too. If an empty buffer is not emitted by the file content pull stream for file with no data then no file is written out to the filesystem. This was a fun bug to track down. This module used to emit empty buffers for UnixFS metadata with no data property but I removed it as part of #208 as there was no test that failed without it so here we reinstate it and add a test. --- package.json | 2 +- src/exporter/file.js | 6 +- test/exporter.js | 103 ++++++++++++++++++ .../dir-with-empty-files/empty-file.txt | 0 4 files changed, 105 insertions(+), 6 deletions(-) create mode 100644 test/fixtures/dir-with-empty-files/empty-file.txt diff --git a/package.json b/package.json index 64ce31d7..12aa589d 100644 --- a/package.json +++ b/package.json @@ -60,7 +60,7 @@ "cids": "~0.5.3", "deep-extend": "~0.6.0", "ipfs-unixfs": "~0.1.14", - "ipld": "^0.17.2", + "ipld": "~0.17.2", "ipld-dag-pb": "~0.14.4", "left-pad": "^1.3.0", "lodash": "^4.17.10", diff --git a/src/exporter/file.js b/src/exporter/file.js index 6a0d801f..6ca27cc9 100644 --- a/src/exporter/file.js +++ b/src/exporter/file.js @@ -63,15 +63,11 @@ function streamBytes (dag, node, fileSize, offset, length) { let streamPosition = 0 function getData ({ node, start }) { - if (!node || !node.data) { - return - } - try { const file = UnixFS.unmarshal(node.data) if (!file.data) { - return + return Buffer.alloc(0) } const block = extractDataFromBlock(file.data, start, offset, end) diff --git a/test/exporter.js b/test/exporter.js index d165e7e4..5fd4adc8 100644 --- a/test/exporter.js +++ b/test/exporter.js @@ -14,6 +14,12 @@ const CID = require('cids') const loadFixture = require('aegir/fixtures') const doUntil = require('async/doUntil') const waterfall = require('async/waterfall') +const series = require('async/series') +const fs = require('fs') +const path = require('path') +const push = require('pull-pushable') +const toPull = require('stream-to-pull-stream') +const toStream = require('pull-stream-to-stream') const unixFSEngine = require('./../src') const exporter = unixFSEngine.exporter @@ -64,6 +70,54 @@ module.exports = (repo) => { }) } + function addTestDirectory ({directory, strategy = 'balanced', maxChunkSize}, callback) { + const input = push() + const dirName = path.basename(directory) + + pull( + input, + pull.map((file) => { + const ipfsPath = `${path.join(dirName, file.substring(directory.length))}` + + return { + path: ipfsPath, + content: toPull.source(fs.createReadStream(file)) + } + }), + importer(ipld, { + strategy, + maxChunkSize + }), + pull.collect(callback) + ) + + const listFiles = (directory, depth, stream, cb) => { + waterfall([ + (done) => fs.stat(directory, done), + (stats, done) => { + if (stats.isDirectory()) { + return waterfall([ + (done) => fs.readdir(directory, done), + (children, done) => { + series( + children.map(child => (next) => listFiles(path.join(directory, child), depth + 1, stream, next)), + done + ) + } + ], done) + } + + stream.push(directory) + done() + } + ], cb) + } + + listFiles(directory, 0, input, () => { + input.end() + }) + } + function checkBytesThatSpanBlocks (strategy, cb) { const bytesInABlock = 262144 const bytes = Buffer.alloc(bytesInABlock + 100, 0) @@ -517,6 +571,55 @@ module.exports = (repo) => { checkBytesThatSpanBlocks('trickle', done) }) + it('exports a directory containing an empty file whose content gets turned into a ReadableStream', function (done) { + // replicates the behaviour of ipfs.files.get + waterfall([ + (cb) => addTestDirectory({ + directory: path.join(__dirname, 'fixtures', 'dir-with-empty-files') + }, cb), + (result, cb) => { + const dir = result.pop() + + pull( + exporter(dir.multihash, ipld), + pull.map((file) => { + if (file.content) { + file.content = toStream.source(file.content) + file.content.pause() + } + + return file + }), + pull.collect((error, files) => { + if (error) { + return cb(error) + } + + series( + files + .filter(file => Boolean(file.content)) + .map(file => { + return (done) => { + if (file.content) { + file.content + .pipe(toStream.sink(pull.collect((error, bufs) => { + expect(error).to.not.exist() + expect(bufs.length).to.equal(1) + expect(bufs[0].length).to.equal(0) + + done() + }))) + } + } + }), + cb + ) + }) + ) + } + ], done) + }) + // TODO: This needs for the stores to have timeouts, // otherwise it is impossible to predict if a file doesn't // really exist diff --git a/test/fixtures/dir-with-empty-files/empty-file.txt b/test/fixtures/dir-with-empty-files/empty-file.txt new file mode 100644 index 00000000..e69de29b From bbb403c79ee99ffc1bd715dd7a5338ed937ed33f Mon Sep 17 00:00:00 2001 From: achingbrain Date: Wed, 6 Jun 2018 15:09:57 +0100 Subject: [PATCH 3/5] chore: handle graphs with data on internal and leaf nodes --- src/exporter/file.js | 19 +++++--- test/exporter.js | 101 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 113 insertions(+), 7 deletions(-) diff --git a/src/exporter/file.js b/src/exporter/file.js index 6ca27cc9..675224c1 100644 --- a/src/exporter/file.js +++ b/src/exporter/file.js @@ -60,7 +60,6 @@ function streamBytes (dag, node, fileSize, offset, length) { } const end = offset + length - let streamPosition = 0 function getData ({ node, start }) { try { @@ -70,18 +69,24 @@ function streamBytes (dag, node, fileSize, offset, length) { return Buffer.alloc(0) } - const block = extractDataFromBlock(file.data, start, offset, end) - - streamPosition += block.length - - return block + return extractDataFromBlock(file.data, start, offset, end) } catch (error) { throw new Error(`Failed to unmarshal node - ${error.message}`) } } + // as we step through the children, keep track of where we are in the stream + // so we can filter out nodes we're not interested in + let streamPosition = 0 + function visitor ({ node }) { const file = UnixFS.unmarshal(node.data) + const nodeHasData = Boolean(file.data && file.data.length) + + // handle case where data is present on leaf nodes and internal nodes + if (nodeHasData && node.links.length) { + streamPosition += file.data.length + } // work out which child nodes contain the requested data const filteredLinks = node.links @@ -96,7 +101,7 @@ function streamBytes (dag, node, fileSize, offset, length) { return child }) - .filter((child, index) => { + .filter((child) => { return (offset >= child.start && offset < child.end) || // child has offset byte (end > child.start && end <= child.end) || // child has end byte (offset < child.start && end > child.end) // child is between offset and end bytes diff --git a/test/exporter.js b/test/exporter.js index 5fd4adc8..82ceff6f 100644 --- a/test/exporter.js +++ b/test/exporter.js @@ -14,12 +14,17 @@ const CID = require('cids') const loadFixture = require('aegir/fixtures') const doUntil = require('async/doUntil') const waterfall = require('async/waterfall') +const parallel = require('async/parallel') const series = require('async/series') const fs = require('fs') const path = require('path') const push = require('pull-pushable') const toPull = require('stream-to-pull-stream') const toStream = require('pull-stream-to-stream') +const { + DAGNode, + DAGLink +} = require('ipld-dag-pb') const unixFSEngine = require('./../src') const exporter = unixFSEngine.exporter @@ -635,6 +640,79 @@ module.exports = (repo) => { }) ) }) + + it('exports file with data on internal and leaf nodes', function (done) { + waterfall([ + (cb) => createAndPersistNode(ipld, 'raw', [0x04, 0x05, 0x06, 0x07], [], cb), + (leaf, cb) => createAndPersistNode(ipld, 'file', [0x00, 0x01, 0x02, 0x03], [ + leaf + ], cb), + (file, cb) => { + pull( + exporter(file.multihash, ipld), + pull.asyncMap((file, cb) => readFile(file, cb)), + pull.through(buffer => { + expect(buffer).to.deep.equal(Buffer.from([0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07])) + }), + pull.collect(cb) + ) + } + ], done) + }) + + it('exports file with data on some internal and leaf nodes', function (done) { + // create a file node with three children: + // where: + // i = internal node without data + // d = internal node with data + // l = leaf node with data + // i + // / | \ + // l d i + // | \ + // l l + waterfall([ + (cb) => { + // create leaves + parallel([ + (next) => createAndPersistNode(ipld, 'raw', [0x00, 0x01, 0x02, 0x03], [], next), + (next) => createAndPersistNode(ipld, 'raw', [0x08, 0x09, 0x10, 0x11], [], next), + (next) => createAndPersistNode(ipld, 'raw', [0x12, 0x13, 0x14, 0x15], [], next) + ], cb) + }, + (leaves, cb) => { + parallel([ + (next) => createAndPersistNode(ipld, 'raw', [0x04, 0x05, 0x06, 0x07], [leaves[1]], next), + (next) => createAndPersistNode(ipld, 'raw', null, [leaves[2]], next) + ], (error, internalNodes) => { + if (error) { + return cb(error) + } + + createAndPersistNode(ipld, 'file', null, [ + leaves[0], + internalNodes[0], + internalNodes[1] + ], cb) + }) + }, + (file, cb) => { + pull( + exporter(file.multihash, ipld), + pull.asyncMap((file, cb) => readFile(file, cb)), + pull.through(buffer => { + expect(buffer).to.deep.equal( + Buffer.from([ + 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, + 0x08, 0x09, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15 + ]) + ) + }), + pull.collect(cb) + ) + } + ], done) + }) }) } @@ -670,3 +748,26 @@ function readFile (file, done) { }) ) } + +function createAndPersistNode (ipld, type, data, children, callback) { + const file = new UnixFS(type, data ? Buffer.from(data) : undefined) + const links = [] + + children.forEach(child => { + const leaf = UnixFS.unmarshal(child.data) + + file.addBlockSize(leaf.fileSize()) + + links.push(new DAGLink('', child.size, child.multihash)) + }) + + DAGNode.create(file.marshal(), links, (error, node) => { + if (error) { + return callback(error) + } + + ipld.put(node, { + cid: new CID(node.multihash) + }, (error) => callback(error, node)) + }) +} From cd570b7451b423b1cf8fcd930ce07f78686b7769 Mon Sep 17 00:00:00 2001 From: achingbrain Date: Wed, 6 Jun 2018 15:27:38 +0100 Subject: [PATCH 4/5] chore: handle offsets when data is on internal and leaf nodes --- package.json | 2 +- src/exporter/file.js | 6 ++++++ test/exporter.js | 21 +++++++++++++++++++++ 3 files changed, 28 insertions(+), 1 deletion(-) diff --git a/package.json b/package.json index 12aa589d..9547d625 100644 --- a/package.json +++ b/package.json @@ -59,7 +59,7 @@ "bs58": "^4.0.1", "cids": "~0.5.3", "deep-extend": "~0.6.0", - "ipfs-unixfs": "~0.1.14", + "ipfs-unixfs": "~0.1.15", "ipld": "~0.17.2", "ipld-dag-pb": "~0.14.4", "left-pad": "^1.3.0", diff --git a/src/exporter/file.js b/src/exporter/file.js index 675224c1..4ee290fb 100644 --- a/src/exporter/file.js +++ b/src/exporter/file.js @@ -139,6 +139,12 @@ function streamBytes (dag, node, fileSize, offset, length) { function extractDataFromBlock (block, streamPosition, begin, end) { const blockLength = block.length + if (begin >= streamPosition + blockLength) { + // If begin is after the start of the block, return an empty block + // This can happen when internal nodes contain data + return Buffer.alloc(0) + } + if (end - streamPosition < blockLength) { // If the end byte is in the current block, truncate the block to the end byte block = block.slice(0, end - streamPosition) diff --git a/test/exporter.js b/test/exporter.js index 82ceff6f..c3ce4a48 100644 --- a/test/exporter.js +++ b/test/exporter.js @@ -713,6 +713,27 @@ module.exports = (repo) => { } ], done) }) + + it('exports file with data on internal and leaf nodes with an offset that only fetches data from leaf nodes', function (done) { + waterfall([ + (cb) => createAndPersistNode(ipld, 'raw', [0x04, 0x05, 0x06, 0x07], [], cb), + (leaf, cb) => createAndPersistNode(ipld, 'file', [0x00, 0x01, 0x02, 0x03], [ + leaf + ], cb), + (file, cb) => { + pull( + exporter(file.multihash, ipld, { + offset: 4 + }), + pull.asyncMap((file, cb) => readFile(file, cb)), + pull.through(buffer => { + expect(buffer).to.deep.equal(Buffer.from([0x04, 0x05, 0x06, 0x07])) + }), + pull.collect(cb) + ) + } + ], done) + }) }) } From 1a70af6a099a39e81592af68aecc30b8abf3c4e8 Mon Sep 17 00:00:00 2001 From: achingbrain Date: Fri, 8 Jun 2018 18:11:26 +0100 Subject: [PATCH 5/5] chore: fix sinon spys in browser tests --- src/exporter/file.js | 4 ++-- test/exporter.js | 4 +--- test/importer.js | 4 ++-- test/node.js | 4 ++-- 4 files changed, 7 insertions(+), 9 deletions(-) diff --git a/src/exporter/file.js b/src/exporter/file.js index 4ee290fb..0da35e8c 100644 --- a/src/exporter/file.js +++ b/src/exporter/file.js @@ -30,7 +30,7 @@ module.exports = (node, name, path, pathRest, resolve, size, dag, parent, depth, } if (length === 0) { - return pull.empty() + return pull.once(Buffer.alloc(0)) } if (!offset) { @@ -56,7 +56,7 @@ module.exports = (node, name, path, pathRest, resolve, size, dag, parent, depth, function streamBytes (dag, node, fileSize, offset, length) { if (offset === fileSize || length === 0) { - return pull.empty() + return pull.once(Buffer.alloc(0)) } const end = offset + length diff --git a/test/exporter.js b/test/exporter.js index c3ce4a48..1bca514c 100644 --- a/test/exporter.js +++ b/test/exporter.js @@ -82,10 +82,8 @@ module.exports = (repo) => { pull( input, pull.map((file) => { - const ipfsPath = `${path.join(dirName, file.substring(directory.length))}` - return { - path: ipfsPath, + path: path.join(dirName, path.basename(file)), content: toPull.source(fs.createReadStream(file)) } }), diff --git a/test/importer.js b/test/importer.js index dfbabbe5..f5b1edba 100644 --- a/test/importer.js +++ b/test/importer.js @@ -7,7 +7,7 @@ const extend = require('deep-extend') const chai = require('chai') chai.use(require('dirty-chai')) const expect = chai.expect -const sinon = require('sinon') +const spy = require('sinon/lib/sinon/spy') const BlockService = require('ipfs-block-service') const pull = require('pull-stream') const mh = require('multihashes') @@ -456,7 +456,7 @@ module.exports = (repo) => { }) it('will call an optional progress function', (done) => { - options.progress = sinon.spy() + options.progress = spy() pull( pull.values([{ diff --git a/test/node.js b/test/node.js index 8bc82510..064428e9 100644 --- a/test/node.js +++ b/test/node.js @@ -10,8 +10,8 @@ const mkdirp = require('mkdirp') const series = require('async/series') describe('IPFS UnixFS Engine', () => { - const repoExample = path.join(process.cwd(), '/test/test-repo') - const repoTests = path.join(os.tmpdir(), '/unixfs-tests-' + Date.now()) + const repoExample = path.join(process.cwd(), 'test', 'test-repo') + const repoTests = path.join(os.tmpdir(), 'unixfs-tests-' + Date.now()) const repo = new IPFSRepo(repoTests)