Skip to content

Commit

Permalink
Merge pull request ipfs/go-merkledag#50 from MichaelMure/dagutils
Browse files Browse the repository at this point in the history
Migrate dagutils from go-ipfs

This commit was moved from ipfs/go-merkledag@cfc27e8
  • Loading branch information
Stebalien committed Dec 2, 2019
2 parents b6d5034 + 502aa44 commit bfb91f3
Show file tree
Hide file tree
Showing 5 changed files with 910 additions and 0 deletions.
214 changes: 214 additions & 0 deletions ipld/merkledag/dagutils/diff.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,214 @@
package dagutils

import (
"context"
"fmt"
"path"

"github.com/ipfs/go-cid"
ipld "github.com/ipfs/go-ipld-format"

dag "github.com/ipfs/go-merkledag"
)

// ChangeType denotes type of change in Change
type ChangeType int

// These constants define the changes that can be applied to a DAG.
const (
Add ChangeType = iota
Remove
Mod
)

// Change represents a change to a DAG and contains a reference to the old and
// new CIDs.
type Change struct {
Type ChangeType
Path string
Before cid.Cid
After cid.Cid
}

// String prints a human-friendly line about a change.
func (c *Change) String() string {
switch c.Type {
case Add:
return fmt.Sprintf("Added %s at %s", c.After.String(), c.Path)
case Remove:
return fmt.Sprintf("Removed %s from %s", c.Before.String(), c.Path)
case Mod:
return fmt.Sprintf("Changed %s to %s at %s", c.Before.String(), c.After.String(), c.Path)
default:
panic("nope")
}
}

// ApplyChange applies the requested changes to the given node in the given dag.
func ApplyChange(ctx context.Context, ds ipld.DAGService, nd *dag.ProtoNode, cs []*Change) (*dag.ProtoNode, error) {
e := NewDagEditor(nd, ds)
for _, c := range cs {
switch c.Type {
case Add:
child, err := ds.Get(ctx, c.After)
if err != nil {
return nil, err
}

childpb, ok := child.(*dag.ProtoNode)
if !ok {
return nil, dag.ErrNotProtobuf
}

err = e.InsertNodeAtPath(ctx, c.Path, childpb, nil)
if err != nil {
return nil, err
}

case Remove:
err := e.RmLink(ctx, c.Path)
if err != nil {
return nil, err
}

case Mod:
err := e.RmLink(ctx, c.Path)
if err != nil {
return nil, err
}
child, err := ds.Get(ctx, c.After)
if err != nil {
return nil, err
}

childpb, ok := child.(*dag.ProtoNode)
if !ok {
return nil, dag.ErrNotProtobuf
}

err = e.InsertNodeAtPath(ctx, c.Path, childpb, nil)
if err != nil {
return nil, err
}
}
}

return e.Finalize(ctx, ds)
}

// Diff returns a set of changes that transform node 'a' into node 'b'.
// It only traverses links in the following cases:
// 1. two node's links number are greater than 0.
// 2. both of two nodes are ProtoNode.
// Otherwise, it compares the cid and emits a Mod change object.
func Diff(ctx context.Context, ds ipld.DAGService, a, b ipld.Node) ([]*Change, error) {
// Base case where both nodes are leaves, just compare
// their CIDs.
if len(a.Links()) == 0 && len(b.Links()) == 0 {
return getChange(a, b)
}

var out []*Change
cleanA, okA := a.Copy().(*dag.ProtoNode)
cleanB, okB := b.Copy().(*dag.ProtoNode)
if !okA || !okB {
return getChange(a, b)
}

// strip out unchanged stuff
for _, lnk := range a.Links() {
l, _, err := b.ResolveLink([]string{lnk.Name})
if err == nil {
if l.Cid.Equals(lnk.Cid) {
// no change... ignore it
} else {
anode, err := lnk.GetNode(ctx, ds)
if err != nil {
return nil, err
}

bnode, err := l.GetNode(ctx, ds)
if err != nil {
return nil, err
}

sub, err := Diff(ctx, ds, anode, bnode)
if err != nil {
return nil, err
}

for _, subc := range sub {
subc.Path = path.Join(lnk.Name, subc.Path)
out = append(out, subc)
}
}
_ = cleanA.RemoveNodeLink(l.Name)
_ = cleanB.RemoveNodeLink(l.Name)
}
}

for _, lnk := range cleanA.Links() {
out = append(out, &Change{
Type: Remove,
Path: lnk.Name,
Before: lnk.Cid,
})
}
for _, lnk := range cleanB.Links() {
out = append(out, &Change{
Type: Add,
Path: lnk.Name,
After: lnk.Cid,
})
}

return out, nil
}

// Conflict represents two incompatible changes and is returned by MergeDiffs().
type Conflict struct {
A *Change
B *Change
}

// MergeDiffs takes two slice of changes and adds them to a single slice.
// When a Change from b happens to the same path of an existing change in a,
// a conflict is created and b is not added to the merged slice.
// A slice of Conflicts is returned and contains pointers to the
// Changes involved (which share the same path).
func MergeDiffs(a, b []*Change) ([]*Change, []Conflict) {
var out []*Change
var conflicts []Conflict
paths := make(map[string]*Change)
for _, c := range a {
paths[c.Path] = c
}

for _, c := range b {
if ca, ok := paths[c.Path]; ok {
conflicts = append(conflicts, Conflict{
A: ca,
B: c,
})
} else {
out = append(out, c)
}
}
for _, c := range paths {
out = append(out, c)
}
return out, conflicts
}

func getChange(a, b ipld.Node) ([]*Change, error) {
if a.Cid().Equals(b.Cid()) {
return []*Change{}, nil
}
return []*Change{
{
Type: Mod,
Before: a.Cid(),
After: b.Cid(),
},
}, nil
}
99 changes: 99 additions & 0 deletions ipld/merkledag/dagutils/diffenum.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,99 @@
package dagutils

import (
"context"
"fmt"

cid "github.com/ipfs/go-cid"
ipld "github.com/ipfs/go-ipld-format"

mdag "github.com/ipfs/go-merkledag"
)

// DiffEnumerate fetches every object in the graph pointed to by 'to' that is
// not in 'from'. This can be used to more efficiently fetch a graph if you can
// guarantee you already have the entirety of 'from'
func DiffEnumerate(ctx context.Context, dserv ipld.NodeGetter, from, to cid.Cid) error {
fnd, err := dserv.Get(ctx, from)
if err != nil {
return fmt.Errorf("get %s: %s", from, err)
}

tnd, err := dserv.Get(ctx, to)
if err != nil {
return fmt.Errorf("get %s: %s", to, err)
}

diff := getLinkDiff(fnd, tnd)

sset := cid.NewSet()
for _, c := range diff {
// Since we're already assuming we have everything in the 'from' graph,
// add all those cids to our 'already seen' set to avoid potentially
// enumerating them later
if c.bef.Defined() {
sset.Add(c.bef)
}
}
for _, c := range diff {
if !c.bef.Defined() {
if sset.Has(c.aft) {
continue
}
err := mdag.Walk(ctx, mdag.GetLinksDirect(dserv), c.aft, sset.Visit, mdag.Concurrent())
if err != nil {
return err
}
} else {
err := DiffEnumerate(ctx, dserv, c.bef, c.aft)
if err != nil {
return err
}
}
}

return nil
}

// if both bef and aft are not nil, then that signifies bef was replaces with aft.
// if bef is nil and aft is not, that means aft was newly added
// if aft is nil and bef is not, that means bef was deleted
type diffpair struct {
bef, aft cid.Cid
}

// getLinkDiff returns a changeset between nodes 'a' and 'b'. Currently does
// not log deletions as our usecase doesnt call for this.
func getLinkDiff(a, b ipld.Node) []diffpair {
ina := make(map[string]*ipld.Link)
inb := make(map[string]*ipld.Link)
var aonly []cid.Cid
for _, l := range b.Links() {
inb[l.Cid.KeyString()] = l
}
for _, l := range a.Links() {
var key = l.Cid.KeyString()
ina[key] = l
if inb[key] == nil {
aonly = append(aonly, l.Cid)
}
}

var out []diffpair
var aindex int

for _, l := range b.Links() {
if ina[l.Cid.KeyString()] != nil {
continue
}

if aindex < len(aonly) {
out = append(out, diffpair{bef: aonly[aindex], aft: l.Cid})
aindex++
} else {
out = append(out, diffpair{aft: l.Cid})
continue
}
}
return out
}

0 comments on commit bfb91f3

Please sign in to comment.