Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(sroar): Bring sroar to Dgraph #7840

Merged
merged 11 commits into from May 22, 2021
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
79 changes: 25 additions & 54 deletions codec/codec.go
Expand Up @@ -22,8 +22,7 @@ import (
"github.com/dgraph-io/dgraph/protos/pb"
"github.com/dgraph-io/dgraph/x"
"github.com/dgraph-io/ristretto/z"
"github.com/dgraph-io/roaring/roaring64"
"github.com/pkg/errors"
"github.com/dgraph-io/sroar"
)

type seekPos int
Expand All @@ -44,44 +43,29 @@ func ApproxLen(bitmap []byte) int {
return 0
}

// Encode takes in a list of uids and a block size. It would pack these uids into blocks of the
// given size, with the last block having fewer uids. Within each block, it stores the first uid as
// base. For each next uid, a delta = uids[i] - uids[i-1] is stored. Protobuf uses Varint encoding,
// as mentioned here: https://developers.google.com/protocol-buffers/docs/encoding . This ensures
// that the deltas being considerably smaller than the original uids are nicely packed in fewer
// bytes. Our benchmarks on artificial data show compressed size to be 13% of the original. This
// mechanism is a LOT simpler to understand and if needed, debug.
func Encode(uids []uint64) []byte {
r := roaring64.New()
r.AddMany(uids)
b, err := r.ToBytes()
x.Check(err)
return b
}

func ToList(rm *roaring64.Bitmap) *pb.List {
func ToList(rm *sroar.Bitmap) *pb.List {
return &pb.List{
Uids: rm.ToArray(),
// Bitmap: ToBytes(rm),
}
}

func And(rm *roaring64.Bitmap, l *pb.List) {
func And(rm *sroar.Bitmap, l *pb.List) {
rl := FromList(l)
rm.And(rl)
}

func MatrixToBitmap(matrix []*pb.List) *roaring64.Bitmap {
res := roaring64.New()
func MatrixToBitmap(matrix []*pb.List) *sroar.Bitmap {
res := sroar.NewBitmap()
for _, l := range matrix {
r := FromList(l)
res.Or(r)
}
return res
}

func Intersect(matrix []*pb.List) *roaring64.Bitmap {
out := roaring64.New()
func Intersect(matrix []*pb.List) *sroar.Bitmap {
out := sroar.NewBitmap()
if len(matrix) == 0 {
return out
}
Expand All @@ -93,8 +77,8 @@ func Intersect(matrix []*pb.List) *roaring64.Bitmap {
return out
}

func Merge(matrix []*pb.List) *roaring64.Bitmap {
out := roaring64.New()
func Merge(matrix []*pb.List) *sroar.Bitmap {
out := sroar.NewBitmap()
if len(matrix) == 0 {
return out
}
Expand All @@ -106,51 +90,39 @@ func Merge(matrix []*pb.List) *roaring64.Bitmap {
return out
}

func ToBytes(bm *roaring64.Bitmap) []byte {
func ToBytes(bm *sroar.Bitmap) []byte {
if bm.IsEmpty() {
return nil
}
b, err := bm.ToBytes()
x.Check(err)
return b
return bm.ToBuffer()
}

func FromPostingList(r *roaring64.Bitmap, pl *pb.PostingList) error {
if len(pl.Bitmap) == 0 {
return nil
}
if err := r.UnmarshalBinary(pl.Bitmap); err != nil {
return errors.Wrapf(err, "codec.FromPostingList")
}
return nil
}

func FromList(l *pb.List) *roaring64.Bitmap {
iw := roaring64.New()
func FromList(l *pb.List) *sroar.Bitmap {
iw := sroar.NewBitmap()
if l == nil {
return iw
}

if len(l.BitmapDoNotUse) > 0 {
// Only one of Uids or Bitmap should be defined.
x.Check(iw.UnmarshalBinary(l.BitmapDoNotUse))
iw = sroar.FromBuffer(l.BitmapDoNotUse)
}
if len(l.Uids) > 0 {
iw.AddMany(l.Uids)
iw.SetMany(l.Uids)
}
return iw
}

func FromBytes(buf []byte) *roaring64.Bitmap {
r := roaring64.New()
func FromBytes(buf []byte) *sroar.Bitmap {
r := sroar.NewBitmap()
if buf == nil || len(buf) == 0 {
return r
}
x.Check(r.UnmarshalBinary(buf))
return r
return sroar.FromBuffer(buf)
}

func FromBackup(buf []byte) *roaring64.Bitmap {
r := roaring64.New()
func FromBackup(buf []byte) *sroar.Bitmap {
r := sroar.NewBitmap()
var prev uint64
for len(buf) > 0 {
uid, n := binary.Uvarint(buf)
Expand All @@ -160,28 +132,27 @@ func FromBackup(buf []byte) *roaring64.Bitmap {
buf = buf[n:]

next := prev + uid
r.Add(next)
r.Set(next)
prev = next
}
return r
}

func ToUids(plist *pb.PostingList, start uint64) []uint64 {
r := roaring64.New()
x.Check(FromPostingList(r, plist))
r := sroar.FromBuffer(plist.Bitmap)
r.RemoveRange(0, start)
return r.ToArray()
}

// RemoveRange would remove [from, to] from bm.
func RemoveRange(bm *roaring64.Bitmap, from, to uint64) {
func RemoveRange(bm *sroar.Bitmap, from, to uint64) {
bm.RemoveRange(from, to)
bm.Remove(to)
}

// DecodeToBuffer is the same as Decode but it returns a z.Buffer which is
// calloc'ed and can be SHOULD be freed up by calling buffer.Release().
func DecodeToBuffer(buf *z.Buffer, bm *roaring64.Bitmap) {
func DecodeToBuffer(buf *z.Buffer, bm *sroar.Bitmap) {
var last uint64
tmp := make([]byte, 16)
itr := bm.ManyIterator()
Expand Down
8 changes: 4 additions & 4 deletions dgraph/cmd/bulk/count_index.go
Expand Up @@ -29,7 +29,7 @@ import (
"github.com/dgraph-io/dgraph/protos/pb"
"github.com/dgraph-io/dgraph/x"
"github.com/dgraph-io/ristretto/z"
"github.com/dgraph-io/roaring/roaring64"
"github.com/dgraph-io/sroar"
)

// type countEntry struct {
Expand Down Expand Up @@ -137,7 +137,7 @@ func (c *countIndexer) writeIndex(buf *z.Buffer) {
defer alloc.Release()

var pl pb.PostingList
bm := roaring64.New()
bm := sroar.NewBitmap()

outBuf := z.NewBuffer(5<<20, "CountIndexer.Buffer.WriteIndex")
defer outBuf.Release()
Expand All @@ -155,7 +155,7 @@ func (c *countIndexer) writeIndex(buf *z.Buffer) {
badger.KVToBuffer(kv, outBuf)

alloc.Reset()
bm = roaring64.New()
bm = sroar.NewBitmap()
pl.Reset()

// flush out the buffer.
Expand All @@ -170,7 +170,7 @@ func (c *countIndexer) writeIndex(buf *z.Buffer) {
if !bytes.Equal(lastCe.Key(), ce.Key()) {
encode()
}
bm.Add(ce.Uid())
bm.Set(ce.Uid())
lastCe = ce
return nil
})
Expand Down
10 changes: 4 additions & 6 deletions dgraph/cmd/bulk/reduce.go
Expand Up @@ -43,7 +43,7 @@ import (
"github.com/dgraph-io/dgraph/protos/pb"
"github.com/dgraph-io/dgraph/x"
"github.com/dgraph-io/ristretto/z"
"github.com/dgraph-io/roaring/roaring64"
"github.com/dgraph-io/sroar"
"github.com/dustin/go-humanize"
"github.com/golang/snappy"
)
Expand Down Expand Up @@ -592,7 +592,7 @@ func (r *reducer) toList(req *encodeRequest) {
}
}

bm := roaring64.New()
bm := sroar.NewBitmap()
var lastUid uint64
slice, next := []byte{}, start
for next >= 0 && (next < end || end == -1) {
Expand All @@ -605,7 +605,7 @@ func (r *reducer) toList(req *encodeRequest) {
}
lastUid = uid

bm.Add(uid)
bm.Set(uid)
if pbuf := me.Plist(); len(pbuf) > 0 {
p := getPosting()
x.Check(p.Unmarshal(pbuf))
Expand Down Expand Up @@ -647,9 +647,7 @@ func (r *reducer) toList(req *encodeRequest) {
}
}

shouldSplit, err := posting.ShouldSplit(pl)
x.Check(err)
if shouldSplit {
if posting.ShouldSplit(pl) {
// Give ownership of pl.Pack away to list. Rollup would deallocate the Pack.
l := posting.NewList(y.Copy(currentKey), pl, writeVersionTs)
kvs, err := l.Rollup(nil)
Expand Down
1 change: 1 addition & 0 deletions dgraph/cmd/debug/run.go
Expand Up @@ -418,6 +418,7 @@ func history(lookup []byte, itr *badger.Iterator) {
r := codec.FromBytes(plist.Bitmap)
fmt.Fprintf(&buf, " Num uids = %d. Size = %d\n",
r.GetCardinality(), len(plist.Bitmap))

itr := r.ManyIterator()
uids := make([]uint64, 256)
for {
Expand Down
6 changes: 4 additions & 2 deletions go.mod
Expand Up @@ -6,6 +6,8 @@ go 1.12
// replace github.com/dgraph-io/ristretto => /home/mrjn/go/src/github.com/dgraph-io/ristretto
// replace github.com/dgraph-io/roaring => /home/mrjn/go/src/github.com/dgraph-io/roaring

// replace github.com/dgraph-io/sroar => /home/ash/go/src/github.com/dgraph-io/sroar

require (
contrib.go.opencensus.io/exporter/jaeger v0.1.0
contrib.go.opencensus.io/exporter/prometheus v0.1.0
Expand All @@ -23,8 +25,8 @@ require (
github.com/dgraph-io/gqlparser/v2 v2.2.0
github.com/dgraph-io/graphql-transport-ws v0.0.0-20210511143556-2cef522f1f15
github.com/dgraph-io/ristretto v0.0.4-0.20210504190834-0bf2acd73aa3
github.com/dgraph-io/roaring v0.5.6-0.20210227175938-766b897233a5
github.com/dgraph-io/simdjson-go v0.3.0
github.com/dgraph-io/sroar v0.0.0-20210520141110-25dc00d9dab2
github.com/dgrijalva/jwt-go v3.2.0+incompatible
github.com/dgrijalva/jwt-go/v4 v4.0.0-preview1
github.com/dgryski/go-farm v0.0.0-20200201041132-a6ae2369ad13
Expand Down Expand Up @@ -59,7 +61,7 @@ require (
github.com/spf13/cobra v0.0.5
github.com/spf13/pflag v1.0.3
github.com/spf13/viper v1.7.1
github.com/stretchr/testify v1.6.1
github.com/stretchr/testify v1.7.0
github.com/tinylib/msgp v1.1.5 // indirect
github.com/twpayne/go-geom v1.0.5
github.com/xdg/scram v0.0.0-20180814205039-7eeb5667e42c
Expand Down
15 changes: 8 additions & 7 deletions go.sum
Expand Up @@ -41,8 +41,8 @@ github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAE
github.com/OneOfOne/xxhash v1.2.5 h1:zl/OfRA6nftbBK9qTohYBJ5xvw6C/oNKizR7cZGl3cI=
github.com/OneOfOne/xxhash v1.2.5/go.mod h1:eZbhyaAYD41SGSSsnmcpxVoRiQ/MPUTjUdIIOT9Um7Q=
github.com/RoaringBitmap/roaring v0.4.23/go.mod h1:D0gp8kJQgE1A4LQ5wFLggQEyvDi06Mq5mKs52e1TwOo=
github.com/RoaringBitmap/roaring v0.5.5 h1:naNqvO1mNnghk2UvcsqnzHDBn9DRbCIRy94GmDTRVTQ=
github.com/RoaringBitmap/roaring v0.5.5/go.mod h1:puNo5VdzwbaIQxSiDIwfXl4Hnc+fbovcX4IW/dSTtUk=
github.com/RoaringBitmap/roaring v0.6.1 h1:O36Tdaj1Fi/zyr25shTHwlQPGdq53+u4WkM08AOEjiE=
github.com/RoaringBitmap/roaring v0.6.1/go.mod h1:WZ83fjBF/7uBHi6QoFyfGL4+xuV4Qn+xFkm4+vSzrhE=
github.com/Shopify/goreferrer v0.0.0-20181106222321-ec9c9a553398/go.mod h1:a1uqRtAwp2Xwc6WNPJEufxJ7fx3npB4UV/JOLmbu5I0=
github.com/Shopify/sarama v1.19.0/go.mod h1:FVkBWblsNy7DGZRfXLU0O9RCGt5g3g3yEuWXgklEdEo=
github.com/Shopify/sarama v1.27.2 h1:1EyY1dsxNDUQEv0O/4TsjosHI2CgB1uo9H/v56xzTxc=
Expand Down Expand Up @@ -133,10 +133,12 @@ github.com/dgraph-io/graphql-transport-ws v0.0.0-20210511143556-2cef522f1f15 h1:
github.com/dgraph-io/graphql-transport-ws v0.0.0-20210511143556-2cef522f1f15/go.mod h1:7z3c/5w0sMYYZF5bHsrh8IH4fKwG5O5Y70cPH1ZLLRQ=
github.com/dgraph-io/ristretto v0.0.4-0.20210504190834-0bf2acd73aa3 h1:jU/wpYsEL+8JPLf/QcjkQKI5g0dOjSuwcMjkThxt5x0=
github.com/dgraph-io/ristretto v0.0.4-0.20210504190834-0bf2acd73aa3/go.mod h1:fux0lOrBhrVCJd3lcTHsIJhq1T2rokOu6v9Vcb3Q9ug=
github.com/dgraph-io/roaring v0.5.6-0.20210227175938-766b897233a5 h1:9t3OKcvsQlxU9Cu0U55tgvNtaRYVGDr6rUb95P8cSbg=
github.com/dgraph-io/roaring v0.5.6-0.20210227175938-766b897233a5/go.mod h1:I8kxPBtSQW3OdQFWonumQdCx2DTmq2WjdnTjGXz3uTM=
github.com/dgraph-io/simdjson-go v0.3.0 h1:h71LO7vR4LHMPUhuoGN8bqGm1VNfGOlAG8BI6iDUKw0=
github.com/dgraph-io/simdjson-go v0.3.0/go.mod h1:Otpysdjaxj9OGaJusn4pgQV7OFh2bELuHANq0I78uvY=
github.com/dgraph-io/sroar v0.0.0-20210520133203-eb40a693196b h1:bkYNaUEk9m+jRlgIkPUHH+z2mL/z8U6pov7kRrSio8c=
github.com/dgraph-io/sroar v0.0.0-20210520133203-eb40a693196b/go.mod h1:bdNPtQmcxoIQVkZEWZvX0n0/IDlHFab397xdBlP4OoE=
github.com/dgraph-io/sroar v0.0.0-20210520141110-25dc00d9dab2 h1:AMUuWNbUz57/CTxy3ZYhk+JEqiOmhzJV2z2pGOABY5o=
github.com/dgraph-io/sroar v0.0.0-20210520141110-25dc00d9dab2/go.mod h1:bdNPtQmcxoIQVkZEWZvX0n0/IDlHFab397xdBlP4OoE=
github.com/dgrijalva/jwt-go v3.2.0+incompatible h1:7qlOGliEKZXTDg6OTjfoBKDXWrumCAMpl/TFQ4/5kLM=
github.com/dgrijalva/jwt-go v3.2.0+incompatible/go.mod h1:E3ru+11k8xSBh+hMPgOLZmtrrCbhqsmaPHjLKYnJCaQ=
github.com/dgrijalva/jwt-go/v4 v4.0.0-preview1 h1:CaO/zOnF8VvUfEbhRatPcwKVWamvbYd8tQGRWacE9kU=
Expand Down Expand Up @@ -186,9 +188,7 @@ github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeME
github.com/ghodss/yaml v1.0.1-0.20190212211648-25d852aebe32/go.mod h1:GIjDIg/heH5DOkXY3YJ/wNhfHsQHoXGjl8G8amsYQ1I=
github.com/gin-contrib/sse v0.0.0-20190301062529-5545eab6dad3/go.mod h1:VJ0WA2NBN22VlZ2dKZQPAPnyWw5XTlK1KymzLKsr59s=
github.com/gin-gonic/gin v1.4.0/go.mod h1:OW2EZn3DO8Ln9oIKOvM++LBO+5UPHJJDH72/q/3rZdM=
github.com/glycerine/go-unsnap-stream v0.0.0-20181221182339-f9677308dec2 h1:Ujru1hufTHVb++eG6OuNDKMxZnGIvF6o/u8q/8h2+I4=
github.com/glycerine/go-unsnap-stream v0.0.0-20181221182339-f9677308dec2/go.mod h1:/20jfyN9Y5QPEAprSgKAUr+glWDY39ZiUEAYOEv5dsE=
github.com/glycerine/goconvey v0.0.0-20190410193231-58a59202ab31 h1:gclg6gY70GLy3PbkQ1AERPfmLMMagS60DKF78eWwLn8=
github.com/glycerine/goconvey v0.0.0-20190410193231-58a59202ab31/go.mod h1:Ogl1Tioa0aV7gstGFO7KhffUsb9M4ydbEbbxpcEDc24=
github.com/go-check/check v0.0.0-20180628173108-788fd7840127/go.mod h1:9ES+weclKsC9YodN5RgxqK/VD9HM9JsCSh7rNhMZE98=
github.com/go-chi/chi v3.3.2+incompatible/go.mod h1:eB3wogJHnLi3x/kFX2A+IbTBlXxmMeXJVKy9tTv1XzQ=
Expand Down Expand Up @@ -569,8 +569,9 @@ github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXf
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA=
github.com/stretchr/testify v1.6.1 h1:hDPOHmpOpP40lSULcqw7IrRb/u7w6RpDC9399XyoNd0=
github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.7.0 h1:nwc3DEeHmmLAfoZucVR881uASk0Mfjw8xYJ99tb5CcY=
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/subosito/gotenv v1.2.0 h1:Slr1R9HxAlEKefgq5jn9U+DnETlIUa6HfgEzj0g5d7s=
github.com/subosito/gotenv v1.2.0/go.mod h1:N0PQaV/YGNqwC0u51sEeR/aUtSLEXKX9iv69rRypqCw=
github.com/syndtr/goleveldb v1.0.0/go.mod h1:ZVVdQEZoIme9iO1Ch2Jdy24qqXrMMOU6lpPAyBWyWuQ=
Expand Down