Skip to content
This repository was archived by the owner on Aug 12, 2020. It is now read-only.

Commit cd3c12a

Browse files
authored
Merge pull request #164 from ipfs/feat/sharded-dirs
sharded dirs and interactive flush
2 parents 0348e79 + 29b2740 commit cd3c12a

28 files changed

+2186
-92
lines changed

README.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -154,6 +154,8 @@ Nodes.
154154

155155
In the second argument of the importer constructor you can specify the following options:
156156

157+
* `wrap` (boolean, defaults to false): if true, a wrapping node will be created
158+
* `shardSplitThreshold` (positive integer, defaults to 1000): the number of directory entries above which we decide to use a sharding directory builder (instead of the default flat one)
157159
* `chunker` (string, defaults to `"fixed"`): the chunking strategy. Now only supports `"fixed"`
158160
* `chunkerOptions` (object, optional): the options for the chunker. Defaults to an object with the following properties:
159161
* `maxChunkSize` (positive integer, defaults to `262144`): the maximum chunk size for the `fixed` chunker.
@@ -164,6 +166,9 @@ In the second argument of the importer constructor you can specify the following
164166
* `maxChildrenPerNode` (positive integer, defaults to `174`): the maximum children per node for the `balanced` and `trickle` DAG builder strategies
165167
* `layerRepeat` (positive integer, defaults to 4): (only applicable to the `trickle` DAG builder strategy). The maximum repetition of parent nodes for each layer of the tree.
166168
* `reduceSingleLeafToSelf` (boolean, defaults to `false`): optimization for, when reducing a set of nodes with one node, reduce it to that node.
169+
* `dirBuilder` (object): the options for the directory builder
170+
* `hamt` (object): the options for the HAMT sharded directory builder
171+
* bits (positive integer, defaults to `5`): the number of bits at each bucket of the HAMT
167172

168173
### Example Exporter
169174

package.json

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,8 @@
4848
"pre-commit": "^1.2.2",
4949
"pull-generate": "^2.2.0",
5050
"pull-zip": "^2.0.1",
51-
"rimraf": "^2.6.1"
51+
"rimraf": "^2.6.1",
52+
"split": "^1.0.0"
5253
},
5354
"dependencies": {
5455
"async": "^2.1.5",
@@ -58,8 +59,10 @@
5859
"ipld-dag-pb": "^0.11.0",
5960
"ipld-resolver": "^0.11.0",
6061
"is-ipfs": "^0.3.0",
62+
"left-pad": "^1.1.3",
6163
"lodash": "^4.17.4",
62-
"multihashes": "^0.4.4",
64+
"multihashes": "^0.4.5",
65+
"multihashing-async": "^0.4.0",
6366
"pull-batch": "^1.0.0",
6467
"pull-block": "^1.1.0",
6568
"pull-cat": "^1.1.11",
@@ -69,7 +72,8 @@
6972
"pull-pushable": "^2.0.1",
7073
"pull-stream": "^3.5.0",
7174
"pull-traverse": "^1.0.3",
72-
"pull-write": "^1.1.1"
75+
"pull-write": "^1.1.1",
76+
"sparse-array": "^1.3.1"
7377
},
7478
"contributors": [
7579
"David Dias <daviddias.p@gmail.com>",
@@ -83,4 +87,4 @@
8387
"jbenet <juan@benet.ai>",
8488
"nginnever <ginneversource@gmail.com>"
8589
]
86-
}
90+
}

src/builder/builder.js

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ const defaultOptions = {
2222
module.exports = function (createChunker, ipldResolver, createReducer, _options) {
2323
const options = extend({}, defaultOptions, _options)
2424

25-
return function (source, files) {
25+
return function (source) {
2626
return function (items, cb) {
2727
parallel(items.map((item) => (cb) => {
2828
if (!item.content) {
@@ -33,7 +33,6 @@ module.exports = function (createChunker, ipldResolver, createReducer, _options)
3333
}
3434
if (node) {
3535
source.push(node)
36-
files.push(node)
3736
}
3837
cb()
3938
})
@@ -46,7 +45,6 @@ module.exports = function (createChunker, ipldResolver, createReducer, _options)
4645
}
4746
if (node) {
4847
source.push(node)
49-
files.push(node)
5048
}
5149
cb()
5250
})

src/builder/create-build-stream.js

Lines changed: 3 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -3,23 +3,14 @@
33
const pullPushable = require('pull-pushable')
44
const pullWrite = require('pull-write')
55

6-
module.exports = function createBuildStream (createStrategy, ipldResolver, flushTree, options) {
7-
const files = []
8-
6+
module.exports = function createBuildStream (createStrategy, ipldResolver, options) {
97
const source = pullPushable()
108

119
const sink = pullWrite(
12-
createStrategy(source, files),
10+
createStrategy(source),
1311
null,
1412
options.highWaterMark,
15-
(err) => {
16-
if (err) {
17-
source.end(err)
18-
return // early
19-
}
20-
21-
flushTree(files, ipldResolver, source, source.end)
22-
}
13+
(err) => source.end(err)
2314
)
2415

2516
return {

src/builder/index.js

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -16,10 +16,9 @@ const defaultOptions = {
1616
reduceSingleLeafToSelf: false
1717
}
1818

19-
module.exports = function (Chunker, ipldResolver, flushTree, _options) {
19+
module.exports = function (Chunker, ipldResolver, _options) {
2020
assert(Chunker, 'Missing chunker creator function')
2121
assert(ipldResolver, 'Missing IPLD Resolver')
22-
assert(flushTree, 'Missing flushTree argument')
2322

2423
const options = Object.assign({}, defaultOptions, _options)
2524

@@ -29,5 +28,5 @@ module.exports = function (Chunker, ipldResolver, flushTree, _options) {
2928

3029
const createStrategy = Builder(Chunker, ipldResolver, reducer, options)
3130

32-
return createBuildStream(createStrategy, ipldResolver, flushTree, options)
31+
return createBuildStream(createStrategy, ipldResolver, options)
3332
}

src/exporter/clean-multihash.js

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
'use strict'
2+
3+
const mh = require('multihashes')
4+
5+
module.exports = (multihash) => {
6+
if (Buffer.isBuffer(multihash)) {
7+
return mh.toB58String(multihash)
8+
}
9+
10+
return multihash
11+
}

src/exporter/dir-flat.js

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
'use strict'
2+
3+
const path = require('path')
4+
const pull = require('pull-stream')
5+
const paramap = require('pull-paramap')
6+
const CID = require('cids')
7+
const cat = require('pull-cat')
8+
9+
// Logic to export a unixfs directory.
10+
module.exports = dirExporter
11+
12+
function dirExporter (node, name, ipldResolver, resolve, parent) {
13+
const dir = {
14+
path: name,
15+
hash: node.multihash
16+
}
17+
18+
return cat([
19+
pull.values([dir]),
20+
pull(
21+
pull.values(node.links),
22+
pull.map((link) => ({
23+
path: path.join(name, link.name),
24+
hash: link.multihash
25+
})),
26+
paramap((item, cb) => ipldResolver.get(new CID(item.hash), (err, n) => {
27+
if (err) {
28+
return cb(err)
29+
}
30+
31+
cb(null, resolve(n.value, item.path, ipldResolver, name, parent))
32+
})),
33+
pull.flatten()
34+
)
35+
])
36+
}

src/exporter/dir-hamt-sharded.js

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,48 @@
1+
'use strict'
2+
3+
const path = require('path')
4+
const pull = require('pull-stream')
5+
const paramap = require('pull-paramap')
6+
const CID = require('cids')
7+
const cat = require('pull-cat')
8+
const cleanHash = require('./clean-multihash')
9+
10+
// Logic to export a unixfs directory.
11+
module.exports = shardedDirExporter
12+
13+
function shardedDirExporter (node, name, ipldResolver, resolve, parent) {
14+
let dir
15+
if (!parent || parent.path !== name) {
16+
dir = [{
17+
path: name,
18+
hash: cleanHash(node.multihash)
19+
}]
20+
}
21+
22+
return cat([
23+
pull.values(dir),
24+
pull(
25+
pull.values(node.links),
26+
pull.map((link) => {
27+
// remove the link prefix (2 chars for the bucket index)
28+
let p = link.name.substring(2)
29+
// another sharded dir or file?
30+
p = p ? path.join(name, p) : name
31+
32+
return {
33+
name: link.name,
34+
path: p,
35+
hash: link.multihash
36+
}
37+
}),
38+
paramap((item, cb) => ipldResolver.get(new CID(item.hash), (err, n) => {
39+
if (err) {
40+
return cb(err)
41+
}
42+
43+
cb(null, resolve(n.value, item.path, ipldResolver, (dir && dir[0]) || parent))
44+
})),
45+
pull.flatten()
46+
)
47+
])
48+
}

src/exporter/index.js

Lines changed: 4 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -1,58 +1,23 @@
11
'use strict'
22

3-
const traverse = require('pull-traverse')
43
const pull = require('pull-stream')
54
const CID = require('cids')
65
const isIPFS = require('is-ipfs')
76

8-
const util = require('./../util')
9-
const switchType = util.switchType
10-
const cleanMultihash = util.cleanMultihash
7+
const resolve = require('./resolve').resolve
8+
const cleanMultihash = require('./clean-multihash')
119

12-
const dirExporter = require('./dir')
13-
const fileExporter = require('./file')
14-
15-
module.exports = (hash, ipldResolver, options) => {
10+
module.exports = (hash, ipldResolver) => {
1611
if (!isIPFS.multihash(hash)) {
1712
return pull.error(new Error('not valid multihash'))
1813
}
1914

2015
hash = cleanMultihash(hash)
21-
options = options || {}
22-
23-
function visitor (item) {
24-
if (!item.hash) {
25-
// having no hash means that this visitor got a file object
26-
// which needs no further resolving.
27-
// No further resolving means that the visitor does not
28-
// need to do anyting else, so he's returning
29-
// an empty stream
30-
31-
// TODO: perhaps change the pull streams construct.
32-
// Instead of traversing with a visitor, consider recursing.
33-
return pull.empty()
34-
}
35-
return pull(
36-
ipldResolver.getStream(new CID(item.hash)),
37-
pull.map((result) => result.value),
38-
pull.map((node) => switchType(
39-
node,
40-
() => dirExporter(node, item.path, ipldResolver),
41-
() => fileExporter(node, item.path, ipldResolver)
42-
)),
43-
pull.flatten()
44-
)
45-
}
4616

47-
// Traverse the DAG
4817
return pull(
4918
ipldResolver.getStream(new CID(hash)),
5019
pull.map((result) => result.value),
51-
pull.map((node) => switchType(
52-
node,
53-
() => traverse.widthFirst({path: hash, hash}, visitor),
54-
() => fileExporter(node, hash, ipldResolver)
55-
)),
20+
pull.map((node) => resolve(node, hash, ipldResolver)),
5621
pull.flatten()
5722
)
5823
}

src/exporter/resolve.js

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
'use strict'
2+
3+
const UnixFS = require('ipfs-unixfs')
4+
const pull = require('pull-stream')
5+
6+
const resolvers = {
7+
directory: require('./dir-flat'),
8+
'hamt-sharded-directory': require('./dir-hamt-sharded'),
9+
file: require('./file')
10+
}
11+
12+
module.exports = Object.assign({
13+
resolve: resolve,
14+
typeOf: typeOf
15+
}, resolvers)
16+
17+
function resolve (node, name, ipldResolver, parentNode) {
18+
const type = typeOf(node)
19+
const resolver = resolvers[type]
20+
if (!resolver) {
21+
return pull.error(new Error('Unkown node type ' + type))
22+
}
23+
let stream = resolver(node, name, ipldResolver, resolve, parentNode)
24+
return stream
25+
}
26+
27+
function typeOf (node) {
28+
const data = UnixFS.unmarshal(node.data)
29+
return data.type
30+
}

0 commit comments

Comments
 (0)