From 1aba3f320aeda43dded0463739a70445428020ac Mon Sep 17 00:00:00 2001 From: achingbrain Date: Thu, 11 Nov 2021 17:11:58 +0000 Subject: [PATCH 1/4] fix: do not lose files when writing files into subshards that contain other subshards When writing a file into a hamt shard we hash the filename to figure out where in the shard to place the file. If the hash means that we end up adding the file into an existing subshard that also contains another subshard, we should populate the other subshard's children otherwise they will not be there when we calculate the new CID for the subshard and they will be lost. Fixes #3921 --- .../interface-ipfs-core/src/files/write.js | 44 +++++++++++++++++ .../src/utils/dump-shard.js | 41 ++++++++++++++++ .../src/components/files/utils/add-link.js | 4 +- .../src/components/files/utils/hamt-utils.js | 48 +++++++++++++++---- .../ipfs-core/src/components/files/write.js | 13 +---- 5 files changed, 127 insertions(+), 23 deletions(-) create mode 100644 packages/interface-ipfs-core/src/utils/dump-shard.js diff --git a/packages/interface-ipfs-core/src/files/write.js b/packages/interface-ipfs-core/src/files/write.js index af243c2b55..9004ebca14 100644 --- a/packages/interface-ipfs-core/src/files/write.js +++ b/packages/interface-ipfs-core/src/files/write.js @@ -14,6 +14,7 @@ import { randomBytes, randomStream } from 'iso-random-stream' import all from 'it-all' import isShardAtPath from '../utils/is-shard-at-path.js' import * as raw from 'multiformats/codecs/raw' +import map from 'it-map' /** * @typedef {import('ipfsd-ctl').Factory} Factory @@ -903,6 +904,49 @@ export function testWrite (factory, options) { long: true }))).to.eventually.not.be.empty() }) + + it('writes a file to a sub-shard of a shard that contains another sub-shard', async () => { + const data = Uint8Array.from([0, 1, 2]) + + await ipfs.files.mkdir('/hamttest-mfs') + + const files = [ + 'file-0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000-1398.txt', + 'vivanov-sliceart', + 'file-0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000-1230.txt', + 'methodify', + 'fis-msprd-style-loader_0_13_1', + 'js-form', + 'file-0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000-1181.txt', + 'node-gr', + 'yanvoidmodule', + 'file-0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000-1899.txt', + 'file-0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000-372.txt', + 'file-0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000-1032.txt', + 'file-0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000-1293.txt', + 'file-0000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000000-766.txt' + ] + + for (const path of files) { + await ipfs.files.write(`/hamttest-mfs/${path}`, data, { + shardSplitThreshold: 0, + create: true + }) + } + + const beforeFiles = await all(map(ipfs.files.ls('/hamttest-mfs'), (entry) => entry.name)) + + expect(beforeFiles).to.have.lengthOf(files.length) + + await ipfs.files.write('/hamttest-mfs/supermodule_test', data, { + shardSplitThreshold: 0, + create: true + }) + + const afterFiles = await all(map(ipfs.files.ls('/hamttest-mfs'), (entry) => entry.name)) + + expect(afterFiles).to.have.lengthOf(beforeFiles.length + 1) + }) }) }) } diff --git a/packages/interface-ipfs-core/src/utils/dump-shard.js b/packages/interface-ipfs-core/src/utils/dump-shard.js new file mode 100644 index 0000000000..49f271909a --- /dev/null +++ b/packages/interface-ipfs-core/src/utils/dump-shard.js @@ -0,0 +1,41 @@ +import { UnixFS } from 'ipfs-unixfs' + +/** + * @param {string} path + * @param {import('ipfs-core-types').IPFS} ipfs + */ +export default async function dumpShard (path, ipfs) { + const stats = await ipfs.files.stat(path) + const { value: node } = await ipfs.dag.get(stats.cid) + const entry = UnixFS.unmarshal(node.Data) + + if (entry.type !== 'hamt-sharded-directory') { + throw new Error('Not a shard') + } + + await dumpSubShard(stats.cid, ipfs) +} + +/** + * @param {import('multiformats/cid').CID} cid + * @param {import('ipfs-core-types').IPFS} ipfs + * @param {string} prefix + */ +async function dumpSubShard (cid, ipfs, prefix = '') { + const { value: node } = await ipfs.dag.get(cid) + const entry = UnixFS.unmarshal(node.Data) + + if (entry.type !== 'hamt-sharded-directory') { + throw new Error('Not a shard') + } + + for (const link of node.Links) { + const { value: subNode } = await ipfs.dag.get(link.Hash) + const subEntry = UnixFS.unmarshal(subNode.Data) + console.info(`${prefix}${link.Name}`, ' ', subEntry.type) // eslint-disable-line no-console + + if (link.Name.length === 2) { + await dumpSubShard(link.Hash, ipfs, `${prefix} `) + } + } +} diff --git a/packages/ipfs-core/src/components/files/utils/add-link.js b/packages/ipfs-core/src/components/files/utils/add-link.js index af3658e93a..4ae0543913 100644 --- a/packages/ipfs-core/src/components/files/utils/add-link.js +++ b/packages/ipfs-core/src/components/files/utils/add-link.js @@ -339,7 +339,7 @@ const addFileToShardedDirectory = async (context, options) => { // subshard hasn't been loaded, descend to the next level of the HAMT if (!path[index]) { log(`Loaded new subshard ${segment.prefix}`) - await recreateHamtLevel(subShard.Links, rootBucket, segment.bucket, parseInt(segment.prefix, 16)) + await recreateHamtLevel(context, subShard.Links, rootBucket, segment.bucket, parseInt(segment.prefix, 16)) const position = await rootBucket._findNewBucketAndPos(file.name) @@ -355,7 +355,7 @@ const addFileToShardedDirectory = async (context, options) => { const nextSegment = path[index] // add next levels worth of links to bucket - await addLinksToHamtBucket(subShard.Links, nextSegment.bucket, rootBucket) + await addLinksToHamtBucket(context, subShard.Links, nextSegment.bucket, rootBucket) nextSegment.node = subShard } diff --git a/packages/ipfs-core/src/components/files/utils/hamt-utils.js b/packages/ipfs-core/src/components/files/utils/hamt-utils.js index 8bc22937b1..c05a1c84c1 100644 --- a/packages/ipfs-core/src/components/files/utils/hamt-utils.js +++ b/packages/ipfs-core/src/components/files/utils/hamt-utils.js @@ -72,12 +72,13 @@ export const updateHamtDirectory = async (context, links, bucket, options) => { } /** + * @param {MfsContext} context * @param {PBLink[]} links * @param {Bucket} rootBucket * @param {Bucket} parentBucket * @param {number} positionAtParent */ -export const recreateHamtLevel = async (links, rootBucket, parentBucket, positionAtParent) => { +export const recreateHamtLevel = async (context, links, rootBucket, parentBucket, positionAtParent) => { // recreate this level of the HAMT const bucket = new Bucket({ hash: rootBucket._options.hash, @@ -85,7 +86,7 @@ export const recreateHamtLevel = async (links, rootBucket, parentBucket, positio }, parentBucket, positionAtParent) parentBucket._putObjectAt(positionAtParent, bucket) - await addLinksToHamtBucket(links, bucket, rootBucket) + await addLinksToHamtBucket(context, links, bucket, rootBucket) return bucket } @@ -99,28 +100,57 @@ export const recreateInitialHamtLevel = async (links) => { bits: hamtBucketBits }) - await addLinksToHamtBucket(links, bucket, bucket) + // populate sub bucket but do not recurse as we do not want to pull whole shard in + await Promise.all( + links.map(async link => { + const linkName = (link.Name || '') + + if (linkName.length === 2) { + const pos = parseInt(linkName, 16) + + const subBucket = new Bucket({ + hash: bucket._options.hash, + bits: bucket._options.bits + }, bucket, pos) + bucket._putObjectAt(pos, subBucket) + + return Promise.resolve() + } + + return bucket.put(linkName.substring(2), { + size: link.Tsize, + cid: link.Hash + }) + }) + ) return bucket } /** + * @param {MfsContext} context * @param {PBLink[]} links * @param {Bucket} bucket * @param {Bucket} rootBucket */ -export const addLinksToHamtBucket = async (links, bucket, rootBucket) => { +export const addLinksToHamtBucket = async (context, links, bucket, rootBucket) => { await Promise.all( - links.map(link => { + links.map(async link => { const linkName = (link.Name || '') if (linkName.length === 2) { + log('Populating sub bucket', linkName) const pos = parseInt(linkName, 16) + const block = await context.repo.blocks.get(link.Hash) + const node = dagPB.decode(block) - bucket._putObjectAt(pos, new Bucket({ + const subBucket = new Bucket({ hash: rootBucket._options.hash, bits: rootBucket._options.bits - }, bucket, pos)) + }, bucket, pos) + bucket._putObjectAt(pos, subBucket) + + await addLinksToHamtBucket(context, node.Links, subBucket, rootBucket) return Promise.resolve() } @@ -213,7 +243,7 @@ export const generatePath = async (context, fileName, rootNode) => { if (!path[i + 1]) { log(`Loaded new subshard ${segment.prefix}`) - await recreateHamtLevel(node.Links, rootBucket, segment.bucket, parseInt(segment.prefix, 16)) + await recreateHamtLevel(context, node.Links, rootBucket, segment.bucket, parseInt(segment.prefix, 16)) const position = await rootBucket._findNewBucketAndPos(fileName) // i-- @@ -229,7 +259,7 @@ export const generatePath = async (context, fileName, rootNode) => { const nextSegment = path[i + 1] // add intermediate links to bucket - await addLinksToHamtBucket(node.Links, nextSegment.bucket, rootBucket) + await addLinksToHamtBucket(context, node.Links, nextSegment.bucket, rootBucket) nextSegment.node = node } diff --git a/packages/ipfs-core/src/components/files/write.js b/packages/ipfs-core/src/components/files/write.js index 0c6293865d..7152bef9f4 100644 --- a/packages/ipfs-core/src/components/files/write.js +++ b/packages/ipfs-core/src/components/files/write.js @@ -3,7 +3,6 @@ import { importer } from 'ipfs-unixfs-importer' import { decode } from '@ipld/dag-pb' -import { sha256, sha512 } from 'multiformats/hashes/sha2' import { createStat } from './stat.js' import { createMkdir } from './mkdir.js' import { addLink } from './utils/add-link.js' @@ -293,17 +292,7 @@ const write = async (context, source, destination, options) => { mtime = destination.unixfs.mtime } - let hasher - switch (options.hashAlg) { - case 'sha2-256': - hasher = sha256 - break - case 'sha2-512': - hasher = sha512 - break - default: - throw new Error(`TODO vmx 2021-03-31: Proper error message for unsupported hash algorithms like ${options.hashAlg}`) - } + const hasher = await context.hashers.getHasher(options.hashAlg) const result = await last(importer([{ content: content, From b66f46e65dbe86b00c47370a8401de8223351ae2 Mon Sep 17 00:00:00 2001 From: achingbrain Date: Thu, 11 Nov 2021 17:30:41 +0000 Subject: [PATCH 2/4] chore: http-client-name-api does not depend on ipfs --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 919af880c5..f9b405b19c 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -380,7 +380,7 @@ jobs: deps: ipfs-http-client@$PWD/packages/ipfs-http-client/dist,ipfs@$PWD/packages/ipfs/dist - name: ipfs-http-client name api repo: https://github.com/ipfs-examples/js-ipfs-http-client-name-api.git - deps: ipfs@$PWD/packages/ipfs/dist,ipfs-http-client@$PWD/packages/ipfs-http-client/dist + deps: ipfs-http-client@$PWD/packages/ipfs-http-client/dist - name: ipfs-http-client upload file repo: https://github.com/ipfs-examples/js-ipfs-http-client-upload-file.git deps: ipfs@$PWD/packages/ipfs/dist,ipfs-http-client@$PWD/packages/ipfs-http-client/dist From 94a19639e5fdd38c3173574c70aba40dd89dc612 Mon Sep 17 00:00:00 2001 From: achingbrain Date: Thu, 11 Nov 2021 18:12:10 +0000 Subject: [PATCH 3/4] chore: fix example tests --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index f9b405b19c..9c493dd547 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -347,7 +347,7 @@ jobs: deps: ipfs-core@$PWD/packages/ipfs-core/dist - name: ipfs browser service worker repo: https://github.com/ipfs-examples/js-ipfs-browser-service-worker.git - deps: ipfs-core@$PWD/packages/ipfs-core/dist,ipfs-message-port-client/dist@$PWD/packages/ipfs-message-port-client/dist,ipfs-message-port-protocol@$PWD/packages/ipfs-message-port-protocol/dist,ipfs-message-port-server@$PWD/packages/ipfs-message-port-server/dist + deps: ipfs-core@$PWD/packages/ipfs-core/dist,ipfs-message-port-client@$PWD/packages/ipfs-message-port-client/dist,ipfs-message-port-protocol@$PWD/packages/ipfs-message-port-protocol/dist,ipfs-message-port-server@$PWD/packages/ipfs-message-port-server/dist - name: ipfs browser sharing across tabs repo: https://github.com/ipfs-examples/js-ipfs-browser-sharing-node-across-tabs.git deps: ipfs-core@$PWD/packages/ipfs-core/dist,ipfs-message-port-client@$PWD/packages/ipfs-message-port-client/dist,ipfs-message-port-server@$PWD/packages/ipfs-message-port-server/dist From d054929378ae8f5d47f3662c53a80904fbeea608 Mon Sep 17 00:00:00 2001 From: achingbrain Date: Fri, 12 Nov 2021 10:36:59 +0000 Subject: [PATCH 4/4] chore: update example deps --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 9c493dd547..1105264c01 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -365,7 +365,7 @@ jobs: deps: ipfs-core@$PWD/packages/ipfs-core/dist - name: ipfs custom ipfs repo repo: https://github.com/ipfs-examples/js-ipfs-custom-ipfs-repo.git - deps: ipfs@$PWD/packages/ipfs/dist + deps: ipfs-core@$PWD/packages/ipfs-core/dist - name: ipfs custom ipld formats repo: https://github.com/ipfs-examples/js-ipfs-custom-ipld-formats.git deps: ipfs-core@$PWD/packages/ipfs-core/dist,ipfs-daemon@$PWD/packages/ipfs-daemon/dist,ipfs-http-client@$PWD/packages/ipfs-http-client/dist