Permalink
Browse files

Make metaTuple backed by []byte (#3746)

This is a version change since there was a bug in the old code; it
didn't include the numLeaves in the rolling hash.

Towards #2270
  • Loading branch information...
arv committed Sep 28, 2017
1 parent d129a98 commit 3317130741c1f2e23ce3a9df2096c3d251708d80
View
@@ -10,7 +10,7 @@ import (
"os"
)
const NomsVersion = "7.15"
const NomsVersion = "7.16"
const NOMS_VERSION_NEXT_ENV_NAME = "NOMS_VERSION_NEXT"
const NOMS_VERSION_NEXT_ENV_VALUE = "1"
@@ -64,5 +64,5 @@ func deriveCollectionHeight(c Collection) uint64 {
}
func getRefHeightOfCollection(c Collection) uint64 {
return c.asSequence().getItem(0).(metaTuple).ref.Height()
return c.asSequence().getItem(0).(metaTuple).ref().Height()
}
@@ -64,7 +64,7 @@ func newIndexedMetaSequenceChunkFn(kind NomsKind, vrw ValueReadWriter) makeChunk
for i, v := range items {
mt := v.(metaTuple)
tuples[i] = mt
numLeaves += mt.numLeaves
numLeaves += mt.numLeaves()
}
var col Collection
@@ -81,7 +81,7 @@ func newIndexedMetaSequenceChunkFn(kind NomsKind, vrw ValueReadWriter) makeChunk
func orderedKeyFromSum(msd []metaTuple) orderedKey {
sum := uint64(0)
for _, mt := range msd {
sum += mt.numLeaves
sum += mt.numLeaves()
}
return orderedKeyFromUint64(sum)
}
@@ -111,15 +111,16 @@ func LoadLeafNodes(cols []Collection, startIdx, endIdx uint64) ([]Collection, ui
ms := s.(metaSequence)
for _, mt := range ms.tuples() {
if cum == 0 && mt.numLeaves <= startIdx {
numLeaves := mt.numLeaves()
if cum == 0 && numLeaves <= startIdx {
// skip tuples whose items are < startIdx
startIdx -= mt.numLeaves
endIdx -= mt.numLeaves
startIdx -= numLeaves
endIdx -= numLeaves
continue
}
childTuples = append(childTuples, mt)
cum += mt.numLeaves
cum += numLeaves
if cum >= endIdx {
break
}
@@ -128,7 +129,7 @@ func LoadLeafNodes(cols []Collection, startIdx, endIdx uint64) ([]Collection, ui
hs := make(hash.HashSlice, len(childTuples))
for i, mt := range childTuples {
hs[i] = mt.ref.TargetHash()
hs[i] = mt.ref().TargetHash()
}
// Fetch committed child sequences in a single batch
View
@@ -21,24 +21,55 @@ var emptyKey = orderedKey{}
func newMetaTuple(ref Ref, key orderedKey, numLeaves uint64) metaTuple {
d.PanicIfTrue(ref.buff == nil)
return metaTuple{ref, key, numLeaves}
w := newBinaryNomsWriter()
var offsets [metaTuplePartNumLeaves + 1]uint32
offsets[metaTuplePartRef] = w.offset
ref.writeTo(&w)
offsets[metaTuplePartKey] = w.offset
key.writeTo(&w)
offsets[metaTuplePartNumLeaves] = w.offset
w.writeCount(numLeaves)
return metaTuple{w.data(), offsets}
}
// metaTuple is a node in a Prolly Tree, consisting of data in the node (either tree leaves or other metaSequences), and a Value annotation for exploring the tree (e.g. the largest item if this an ordered sequence).
type metaTuple struct {
ref Ref
key orderedKey
numLeaves uint64
buff []byte
offsets [metaTuplePartNumLeaves + 1]uint32
}
const (
metaTuplePartRef = 0
metaTuplePartKey = 1
metaTuplePartNumLeaves = 2
)
func (mt metaTuple) decoderAtPart(part uint32) valueDecoder {
offset := mt.offsets[part] - mt.offsets[metaTuplePartRef]
return newValueDecoder(mt.buff[offset:], nil)
}
func (mt metaTuple) ref() Ref {
dec := mt.decoderAtPart(metaTuplePartRef)
return dec.readRef()
}
func (mt metaTuple) key() orderedKey {
dec := mt.decoderAtPart(metaTuplePartKey)
return dec.readOrderedKey()
}
func (mt metaTuple) numLeaves() uint64 {
dec := mt.decoderAtPart(metaTuplePartNumLeaves)
return dec.readCount()
}
func (mt metaTuple) getChildSequence(vr ValueReader) sequence {
return mt.ref.TargetValue(vr).(Collection).asSequence()
return mt.ref().TargetValue(vr).(Collection).asSequence()
}
func (mt metaTuple) writeTo(w nomsWriter) {
mt.ref.writeTo(w)
mt.key.writeTo(w)
w.writeCount(mt.numLeaves)
w.writeRaw(mt.buff)
}
// orderedKey is a key in a Prolly Tree level, which is a metaTuple in a metaSequence, or a value in a leaf sequence.
@@ -85,7 +116,7 @@ func (key orderedKey) Less(mk2 orderedKey) bool {
func (key orderedKey) writeTo(w nomsWriter) {
if !key.isOrderedByValue {
// See https://github.com/attic-labs/noms/issues/1688#issuecomment-227528987
d.PanicIfTrue(key.h.IsEmpty())
d.PanicIfTrue(key != emptyKey && key.h.IsEmpty())
writeRefPartsTo(w, key.h, BoolType, 0)
} else {
key.v.writeTo(w)
@@ -190,10 +221,16 @@ func (ms metaSequence) getCompareFn(other sequence) compareFn {
}
func (ms metaSequence) readTuple(dec *valueDecoder) metaTuple {
ref := dec.readRef()
key := dec.readOrderedKey()
numLeaves := dec.readCount()
return newMetaTuple(ref, key, numLeaves)
var offsets [metaTuplePartNumLeaves + 1]uint32
start := dec.offset
offsets[metaTuplePartRef] = start
dec.skipRef()
offsets[metaTuplePartKey] = dec.offset
dec.skipValue()
offsets[metaTuplePartNumLeaves] = dec.offset
dec.skipCount()
end := dec.offset
return metaTuple{dec.byteSlice(start, end), offsets}
}
func (ms metaSequence) getRefAt(dec *valueDecoder, idx int) Ref {
@@ -259,7 +296,7 @@ func (ms metaSequence) Len() uint64 {
func (ms metaSequence) getChildSequence(idx int) sequence {
mt := ms.getItem(idx).(metaTuple)
// TODO: IsZeroValue?
if mt.ref.buff == nil {
if mt.buff == nil {
return nil
}
return mt.getChildSequence(ms.vrw)
@@ -336,16 +373,7 @@ func (ms metaSequence) getChildren(start, end uint64) (seqs []sequence) {
}
func metaHashValueBytes(item sequenceItem, rv *rollingValueHasher) {
mt := item.(metaTuple)
v := mt.key.v
if !mt.key.isOrderedByValue {
// See https://github.com/attic-labs/noms/issues/1688#issuecomment-227528987
d.PanicIfTrue(mt.key.h.IsEmpty())
v = constructRef(mt.key.h, BoolType, 0)
}
hashValueBytes(mt.ref, rv)
hashValueBytes(v, rv)
rv.hashBytes(item.(metaTuple).buff)
}
type emptySequence struct {
@@ -95,10 +95,11 @@ func newOrderedMetaSequenceChunkFn(kind NomsKind, vrw ValueReadWriter) makeChunk
var lastKey orderedKey
for i, v := range items {
mt := v.(metaTuple)
d.PanicIfFalse(lastKey == emptyKey || lastKey.Less(mt.key))
lastKey = mt.key
key := mt.key()
d.PanicIfFalse(lastKey == emptyKey || lastKey.Less(key))
lastKey = key
tuples[i] = mt // chunk is written when the root sequence is written
numLeaves += mt.numLeaves
numLeaves += mt.numLeaves()
}
var col Collection
@@ -109,6 +110,6 @@ func newOrderedMetaSequenceChunkFn(kind NomsKind, vrw ValueReadWriter) makeChunk
col = newMap(newMapMetaSequence(level, tuples, vrw))
}
return col, tuples[len(tuples)-1].key, numLeaves
return col, tuples[len(tuples)-1].key(), numLeaves
}
}
@@ -99,3 +99,8 @@ func (rv *rollingValueHasher) HashValue(v Value) {
v.writeTo(&rv.bw)
rv.sl.Update(rv.bw.data())
}
func (rv *rollingValueHasher) hashBytes(buff []byte) {
rv.bw.writeRaw(buff)
rv.sl.Update(rv.bw.data())
}
View
@@ -334,7 +334,7 @@ func isMetaSequenceSubtypeOf(ms metaSequence, t *Type, hasExtra bool) (bool, boo
// TODO: iterRefs
for _, mt := range ms.tuples() {
// Each prolly tree is also a List<T> where T needs to be a subtype.
isSub, hasMore := isSubtypeTopLevel(t, mt.ref.TargetType())
isSub, hasMore := isSubtypeTopLevel(t, mt.ref().TargetType())
if !isSub {
return false, hasExtra
}
Binary file not shown.
@@ -1 +1 @@
4:7.15:8s92pdafhd4hkhav6r4748u1rjlosh1k:5b1e9knhol2orv0a8ej6tvelc46jp92l:bsvid54jt8pjto211lcdl14tbfd39jmn:2:998se5i5mf15fld7f318818i6ie0c8rr:2
4:7.16:8s92pdafhd4hkhav6r4748u1rjlosh1k:5b1e9knhol2orv0a8ej6tvelc46jp92l:bsvid54jt8pjto211lcdl14tbfd39jmn:2:998se5i5mf15fld7f318818i6ie0c8rr:2

0 comments on commit 3317130

Please sign in to comment.