Skip to content
Permalink
master
Switch branches/tags
Go to file
 
 
Cannot retrieve contributors at this time
// Package balanced provides methods to build balanced DAGs, which are generalistic
// DAGs in which all leaves (nodes representing chunks of data) are at the same
// distance from the root. Nodes can have only a maximum number of children; to be
// able to store more leaf data nodes balanced DAGs are extended by increasing its
// depth (and having more intermediary nodes).
//
// Internal nodes are always represented by UnixFS nodes (of type `File`) encoded
// inside DAG nodes (see the `go-unixfs` package for details of UnixFS). In
// contrast, leaf nodes with data have multiple possible representations: UnixFS
// nodes as above, raw nodes with just the file data (no format) and Filestore
// nodes (that directly link to the file on disk using a format stored on a raw
// node, see the `go-ipfs/filestore` package for details of Filestore.)
//
// In the case the entire file fits into just one node it will be formatted as a
// (single) leaf node (without parent) with the possible representations already
// mentioned. This is the only scenario where the root can be of a type different
// that the UnixFS node.
//
// Notes:
//
// 1. In the implementation. `FSNodeOverDag` structure is used for representing
// the UnixFS node encoded inside the DAG node.
// (see https://github.com/ipfs/go-ipfs/pull/5118.)
//
// 2. `TFile` is used for backwards-compatibility. It was a bug causing the leaf
// nodes to be generated with this type instead of `TRaw`. The former one
// should be used (like the trickle builder does).
// (See https://github.com/ipfs/go-ipfs/pull/5120.)
//
// +-------------+
// | Root 4 |
// +-------------+
// |
// +--------------------------+----------------------------+
// | |
// +-------------+ +-------------+
// | Node 2 | | Node 5 |
// +-------------+ +-------------+
// | |
// +-------------+-------------+ +-------------+
// | | |
// +-------------+ +-------------+ +-------------+
// | Node 1 | | Node 3 | | Node 6 |
// +-------------+ +-------------+ +-------------+
// | | |
// +------+------+ +------+------+ +------+
// | | | | |
// +=========+ +=========+ +=========+ +=========+ +=========+
// | Chunk 1 | | Chunk 2 | | Chunk 3 | | Chunk 4 | | Chunk 5 |
// +=========+ +=========+ +=========+ +=========+ +=========+
package balanced
import (
"errors"
ft "github.com/ipfs/go-unixfs"
h "github.com/ipfs/go-unixfs/importer/helpers"
ipld "github.com/ipfs/go-ipld-format"
)
// Layout builds a balanced DAG layout. In a balanced DAG of depth 1, leaf nodes
// with data are added to a single `root` until the maximum number of links is
// reached. Then, to continue adding more data leaf nodes, a `newRoot` is created
// pointing to the old `root` (which will now become and intermediary node),
// increasing the depth of the DAG to 2. This will increase the maximum number of
// data leaf nodes the DAG can have (`Maxlinks() ^ depth`). The `fillNodeRec`
// function will add more intermediary child nodes to `newRoot` (which already has
// `root` as child) that in turn will have leaf nodes with data added to them.
// After that process is completed (the maximum number of links is reached),
// `fillNodeRec` will return and the loop will be repeated: the `newRoot` created
// will become the old `root` and a new root will be created again to increase the
// depth of the DAG. The process is repeated until there is no more data to add
// (i.e. the DagBuilderHelper’s Done() function returns true).
//
// The nodes are filled recursively, so the DAG is built from the bottom up. Leaf
// nodes are created first using the chunked file data and its size. The size is
// then bubbled up to the parent (internal) node, which aggregates all the sizes of
// its children and bubbles that combined size up to its parent, and so on up to
// the root. This way, a balanced DAG acts like a B-tree when seeking to a byte
// offset in the file the graph represents: each internal node uses the file size
// of its children as an index when seeking.
//
// `Layout` creates a root and hands it off to be filled:
//
// +-------------+
// | Root 1 |
// +-------------+
// |
// ( fillNodeRec fills in the )
// ( chunks on the root. )
// |
// +------+------+
// | |
// + - - - - + + - - - - +
// | Chunk 1 | | Chunk 2 |
// + - - - - + + - - - - +
//
// ↓
// When the root is full but there's more data...
// ↓
//
// +-------------+
// | Root 1 |
// +-------------+
// |
// +------+------+
// | |
// +=========+ +=========+ + - - - - +
// | Chunk 1 | | Chunk 2 | | Chunk 3 |
// +=========+ +=========+ + - - - - +
//
// ↓
// ...Layout's job is to create a new root.
// ↓
//
// +-------------+
// | Root 2 |
// +-------------+
// |
// +-------------+ - - - - - - - - +
// | |
// +-------------+ ( fillNodeRec creates the )
// | Node 1 | ( branch that connects )
// +-------------+ ( "Root 2" to "Chunk 3." )
// | |
// +------+------+ + - - - - -+
// | | |
// +=========+ +=========+ + - - - - +
// | Chunk 1 | | Chunk 2 | | Chunk 3 |
// +=========+ +=========+ + - - - - +
func Layout(db *h.DagBuilderHelper) (ipld.Node, error) {
if db.Done() {
// No data, return just an empty node.
root, err := db.NewLeafNode(nil, ft.TFile)
if err != nil {
return nil, err
}
// This works without Filestore support (`ProcessFileStore`).
// TODO: Why? Is there a test case missing?
return root, db.Add(root)
}
// The first `root` will be a single leaf node with data
// (corner case), after that subsequent `root` nodes will
// always be internal nodes (with a depth > 0) that can
// be handled by the loop.
root, fileSize, err := db.NewLeafDataNode(ft.TFile)
if err != nil {
return nil, err
}
// Each time a DAG of a certain `depth` is filled (because it
// has reached its maximum capacity of `db.Maxlinks()` per node)
// extend it by making it a sub-DAG of a bigger DAG with `depth+1`.
for depth := 1; !db.Done(); depth++ {
// Add the old `root` as a child of the `newRoot`.
newRoot := db.NewFSNodeOverDag(ft.TFile)
newRoot.AddChild(root, fileSize, db)
// Fill the `newRoot` (that has the old `root` already as child)
// and make it the current `root` for the next iteration (when
// it will become "old").
root, fileSize, err = fillNodeRec(db, newRoot, depth)
if err != nil {
return nil, err
}
}
return root, db.Add(root)
}
// fillNodeRec will "fill" the given internal (non-leaf) `node` with data by
// adding child nodes to it, either leaf data nodes (if `depth` is 1) or more
// internal nodes with higher depth (and calling itself recursively on them
// until *they* are filled with data). The data to fill the node with is
// provided by DagBuilderHelper.
//
// `node` represents a (sub-)DAG root that is being filled. If called recursively,
// it is `nil`, a new node is created. If it has been called from `Layout` (see
// diagram below) it points to the new root (that increases the depth of the DAG),
// it already has a child (the old root). New children will be added to this new
// root, and those children will in turn be filled (calling `fillNodeRec`
// recursively).
//
// +-------------+
// | `node` |
// | (new root) |
// +-------------+
// |
// +-------------+ - - - - - - + - - - - - - - - - - - +
// | | |
// +--------------+ + - - - - - + + - - - - - +
// | (old root) | | new child | | |
// +--------------+ + - - - - - + + - - - - - +
// | | |
// +------+------+ + - - + - - - +
// | | | |
// +=========+ +=========+ + - - - - + + - - - - +
// | Chunk 1 | | Chunk 2 | | Chunk 3 | | Chunk 4 |
// +=========+ +=========+ + - - - - + + - - - - +
//
// The `node` to be filled uses the `FSNodeOverDag` abstraction that allows adding
// child nodes without packing/unpacking the UnixFS layer node (having an internal
// `ft.FSNode` cache).
//
// It returns the `ipld.Node` representation of the passed `node` filled with
// children and the `nodeFileSize` with the total size of the file chunk (leaf)
// nodes stored under this node (parent nodes store this to enable efficient
// seeking through the DAG when reading data later).
//
// warning: **children** pinned indirectly, but input node IS NOT pinned.
func fillNodeRec(db *h.DagBuilderHelper, node *h.FSNodeOverDag, depth int) (filledNode ipld.Node, nodeFileSize uint64, err error) {
if depth < 1 {
return nil, 0, errors.New("attempt to fillNode at depth < 1")
}
if node == nil {
node = db.NewFSNodeOverDag(ft.TFile)
}
// Child node created on every iteration to add to parent `node`.
// It can be a leaf node or another internal node.
var childNode ipld.Node
// File size from the child node needed to update the `FSNode`
// in `node` when adding the child.
var childFileSize uint64
// While we have room and there is data available to be added.
for node.NumChildren() < db.Maxlinks() && !db.Done() {
if depth == 1 {
// Base case: add leaf node with data.
childNode, childFileSize, err = db.NewLeafDataNode(ft.TFile)
if err != nil {
return nil, 0, err
}
} else {
// Recursion case: create an internal node to in turn keep
// descending in the DAG and adding child nodes to it.
childNode, childFileSize, err = fillNodeRec(db, nil, depth-1)
if err != nil {
return nil, 0, err
}
}
err = node.AddChild(childNode, childFileSize, db)
if err != nil {
return nil, 0, err
}
}
nodeFileSize = node.FileSize()
// Get the final `dag.ProtoNode` with the `FSNode` data encoded inside.
filledNode, err = node.Commit()
if err != nil {
return nil, 0, err
}
return filledNode, nodeFileSize, nil
}