From 4dd2b72fd32c9d614cba56b18e9a62cc6b65fed5 Mon Sep 17 00:00:00 2001 From: achingbrain Date: Tue, 12 Mar 2019 12:01:25 +0000 Subject: [PATCH] fix: correct hamt structure when modifying deep sub-shards If you have a shard like: ``` F1 A0 C8myfile ``` And you add a file that causes `C8myfile` to become a subshard, previously you would end up with this: ``` F1 A0 C8 C8 B3myfile 82newfile ``` This PR ensures you get the correct structure: ``` F1 A0 C8 B3myfile 82newfile ``` When we update a shard, we re-create a portion of the shard in order to update the existing structure, this avoids loading the entire shard which could be expensive. Previously we weren't descending into the newly created sub-shard to add the correct portion to the existing shard. Fixes #45 --- src/core/utils/add-link.js | 49 +++++++++++++++++++++++++++------ test/helpers/create-shard.js | 2 +- test/write.spec.js | 53 +++++++++++++++++++++++++++++++++++- 3 files changed, 94 insertions(+), 10 deletions(-) diff --git a/src/core/utils/add-link.js b/src/core/utils/add-link.js index ec4f9d8..3304adb 100644 --- a/src/core/utils/add-link.js +++ b/src/core/utils/add-link.js @@ -8,6 +8,7 @@ const CID = require('cids') const waterfall = require('async/waterfall') const DirSharded = require('ipfs-unixfs-importer/src/importer/dir-sharded') const series = require('async/series') +const whilst = require('async/whilst') const log = require('debug')('ipfs:mfs:core:utils:add-link') const UnixFS = require('ipfs-unixfs') const { @@ -127,22 +128,22 @@ const addToShardedDirectory = (context, options, callback) => { return waterfall([ (cb) => generatePath(context, options.name, options.parent, cb), ({ rootBucket, path }, cb) => { - updateShard(context, path, { + updateShard(context, path.reverse(), { name: options.name, cid: options.cid, size: options.size - }, options, (err, result = {}) => cb(err, { rootBucket, ...result })) + }, 0, options, (err, result = {}) => cb(err, { rootBucket, ...result })) }, ({ rootBucket, node }, cb) => updateHamtDirectory(context, node.links, rootBucket, options, cb) ], callback) } -const updateShard = (context, positions, child, options, callback) => { +const updateShard = (context, positions, child, index, options, callback) => { const { bucket, prefix, node - } = positions.pop() + } = positions[index] const link = node.links .find(link => link.name.substring(0, 2) === prefix && link.name !== `${prefix}${child.name}`) @@ -163,9 +164,41 @@ const updateShard = (context, positions, child, options, callback) => { multihash: child.cid.buffer }], {}, done), ({ node: { links: [ shard ] } }, done) => { - return context.ipld.get(shard.cid, (err, result) => { - done(err, { cid: shard.cid, node: result && result.value }) - }) + let position = 0 + + // step through the shard until we find the newly created sub-shard + return whilst( + () => position < positions.length - 1, + (next) => { + const shardPrefix = positions[position].prefix + + log(`Prefix at position ${position} is ${shardPrefix} - shard.name ${shard.name}`) + + if (shard.name.substring(0, 2) !== shardPrefix) { + return next(new Error(`Unexpected prefix ${shard.name} !== ${shardPrefix}, position ${position}`)) + } + + position++ + + context.ipld.get(shard.cid, (err, result) => { + if (err) { + return next(err) + } + + if (position < positions.length) { + const nextPrefix = positions[position].prefix + const nextShard = result.value.links.find(link => link.name.substring(0, 2) === nextPrefix) + + if (nextShard) { + shard = nextShard + } + } + + next(err, { cid: result && result.cid, node: result && result.value }) + }) + }, + done + ) }, (result, cb) => updateShardParent(context, bucket, node, link.name, result.node, result.cid, prefix, options, cb) ], cb) @@ -175,7 +208,7 @@ const updateShard = (context, positions, child, options, callback) => { log(`Descending into sub-shard ${link.name} for ${child.name}`) return waterfall([ - (cb) => updateShard(context, positions, child, options, cb), + (cb) => updateShard(context, positions, child, index + 1, options, cb), (result, cb) => updateShardParent(context, bucket, node, link.name, result.node, result.cid, prefix, options, cb) ], cb) } diff --git a/test/helpers/create-shard.js b/test/helpers/create-shard.js index a195ced..dc2cbcf 100644 --- a/test/helpers/create-shard.js +++ b/test/helpers/create-shard.js @@ -17,7 +17,7 @@ const createShard = (ipld, files, shardSplitThreshold = 10) => { }), collect((err, files) => { if (err) { - return reject(files) + return reject(err) } const dir = files[files.length - 1] diff --git a/test/write.spec.js b/test/write.spec.js index 03e862c..881ad01 100644 --- a/test/write.spec.js +++ b/test/write.spec.js @@ -15,9 +15,11 @@ const { createMfs, cidAtPath, createShardedDirectory, - createTwoShards + createTwoShards, + createShard } = require('./helpers') const CID = require('cids') +const crypto = require('crypto') let fs, tempWrite @@ -881,4 +883,53 @@ describe('write', () => { expect(stats.type).to.equal('hamt-sharded-directory') expect(updatedDirCid.toBaseEncodedString()).to.deep.equal(dirWithAllFiles.toBaseEncodedString()) }) + + it('results in the same hash as a sharded directory created by the importer when causing a subshard of a subshard to be created', async function () { + this.timeout(60000) + + const dir = `/some-dir-${Date.now()}` + + const nodeGrContent = Buffer.from([0, 1, 2, 3, 4]) + const superModuleContent = Buffer.from([5, 6, 7, 8, 9]) + + const dirCid = await createShard(mfs.ipld, [{ + path: `${dir}/node-gr`, + content: nodeGrContent + }, { + path: `${dir}/yanvoidmodule`, + content: crypto.randomBytes(5) + }, { + path: `${dir}/methodify`, + content: crypto.randomBytes(5) + }, { + path: `${dir}/fis-msprd-style-loader_0_13_1`, + content: crypto.randomBytes(5) + }, { + path: `${dir}/js-form`, + content: crypto.randomBytes(5) + }, { + path: `${dir}/vivanov-sliceart`, + content: crypto.randomBytes(5) + }], 1) + + await mfs.cp(`/ipfs/${dirCid.toBaseEncodedString()}`, dir) + + await mfs.write(`${dir}/supermodule_test`, superModuleContent, { + create: true + }) + + await mfs.stat(`${dir}/supermodule_test`) + await mfs.stat(`${dir}/node-gr`) + + expect(await mfs.read(`${dir}/node-gr`)).to.deep.equal(nodeGrContent) + expect(await mfs.read(`${dir}/supermodule_test`)).to.deep.equal(superModuleContent) + + await mfs.rm(`${dir}/supermodule_test`) + + try { + await mfs.stat(`${dir}/supermodule_test`) + } catch (err) { + expect(err.message).to.contain('not exist') + } + }) })