forked from dgraph-io/dgraph
-
Notifications
You must be signed in to change notification settings - Fork 0
/
reduce.go
97 lines (85 loc) · 2.11 KB
/
reduce.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
package main
import (
"bytes"
"sync/atomic"
"github.com/dgraph-io/badger"
"github.com/dgraph-io/dgraph/bp128"
"github.com/dgraph-io/dgraph/protos"
"github.com/dgraph-io/dgraph/x"
)
type reducer struct {
*state
input <-chan shuffleOutput
writesThr *x.Throttle
}
func (r *reducer) run() {
thr := x.NewThrottle(r.opt.NumGoroutines)
for reduceJob := range r.input {
thr.Start()
NumReducers.Add(1)
NumQueuedReduceJobs.Add(-1)
r.writesThr.Start()
go func(job shuffleOutput) {
r.reduce(job)
thr.Done()
NumReducers.Add(-1)
}(reduceJob)
}
thr.Wait()
r.writesThr.Wait()
}
func (r *reducer) reduce(job shuffleOutput) {
var currentKey []byte
var uids []uint64
pl := new(protos.PostingList)
var entries []*badger.Entry
outputPostingList := func() {
atomic.AddInt64(&r.prog.reduceKeyCount, 1)
// For a UID-only posting list, the badger value is a delta packed UID
// list. The UserMeta indicates to treat the value as a delta packed
// list when the value is read by dgraph. For a value posting list,
// the full protos.Posting type is used (which internally contains the
// delta packed UID list).
e := &badger.Entry{Key: currentKey}
if len(pl.Postings) == 0 {
e.Value = bp128.DeltaPack(uids)
e.UserMeta = 0x01
} else {
pl.Uids = bp128.DeltaPack(uids)
var err error
e.Value, err = pl.Marshal()
x.Check(err)
}
entries = append(entries, e)
uids = uids[:0]
pl.Reset()
}
for _, mapEntry := range job.mapEntries {
atomic.AddInt64(&r.prog.reduceEdgeCount, 1)
if bytes.Compare(mapEntry.Key, currentKey) != 0 && currentKey != nil {
outputPostingList()
}
currentKey = mapEntry.Key
uid := mapEntry.Uid
if mapEntry.Posting != nil {
uid = mapEntry.Posting.Uid
}
if len(uids) > 0 && uids[len(uids)-1] == uid {
continue
}
uids = append(uids, uid)
if mapEntry.Posting != nil {
pl.Postings = append(pl.Postings, mapEntry.Posting)
}
}
outputPostingList()
NumBadgerWrites.Add(1)
job.kv.BatchSetAsync(entries, func(err error) {
x.Check(err)
for _, e := range entries {
x.Check(e.Error)
}
NumBadgerWrites.Add(-1)
r.writesThr.Done()
})
}