diff --git a/.changelog/unreleased/improvements/2839-tx_index-lower-heap-allocation.md b/.changelog/unreleased/improvements/2839-tx_index-lower-heap-allocation.md new file mode 100644 index 0000000000..2c76365420 --- /dev/null +++ b/.changelog/unreleased/improvements/2839-tx_index-lower-heap-allocation.md @@ -0,0 +1,2 @@ +- `[state/indexer]` Lower the heap allocation of transaction searches + ([\#2839](https://github.com/cometbft/cometbft/pull/2839)) \ No newline at end of file diff --git a/state/txindex/kv/kv.go b/state/txindex/kv/kv.go index 308c6580fc..d4a33b7779 100644 --- a/state/txindex/kv/kv.go +++ b/state/txindex/kv/kv.go @@ -4,6 +4,7 @@ import ( "bytes" "context" "encoding/hex" + "errors" "fmt" "math/big" "strconv" @@ -21,8 +22,9 @@ import ( ) const ( - tagKeySeparator = "/" - eventSeqSeparator = "$es$" + tagKeySeparator = "/" + tagKeySeparatorRune = '/' + eventSeqSeparator = "$es$" ) var _ txindex.TxIndexer = (*TxIndex)(nil) @@ -331,9 +333,9 @@ func lookForHash(conditions []query.Condition) (hash []byte, ok bool, err error) return } -func (txi *TxIndex) setTmpHashes(tmpHeights map[string][]byte, it dbm.Iterator) { - eventSeq := extractEventSeqFromKey(it.Key()) - tmpHeights[string(it.Value())+eventSeq] = it.Value() +func (*TxIndex) setTmpHashes(tmpHeights map[string][]byte, key, value []byte) { + eventSeq := extractEventSeqFromKey(key) + tmpHeights[string(value)+eventSeq] = value } // match returns all matching txs by hash that meet a given condition and start @@ -370,12 +372,16 @@ func (txi *TxIndex) match( // If we have a height range in a query, we need only transactions // for this height - keyHeight, err := extractHeightFromKey(it.Key()) - if err != nil || !checkHeightConditions(heightInfo, keyHeight) { + key := it.Key() + keyHeight, err := extractHeightFromKey(key) + if err != nil { continue } - - txi.setTmpHashes(tmpHashes, it) + withinBounds := checkHeightConditions(heightInfo, keyHeight) + if !withinBounds { + continue + } + txi.setTmpHashes(tmpHashes, key, it.Value()) // Potentially exit early. select { case <-ctx.Done(): @@ -398,11 +404,16 @@ func (txi *TxIndex) match( EXISTS_LOOP: for ; it.Valid(); it.Next() { - keyHeight, err := extractHeightFromKey(it.Key()) - if err != nil || !checkHeightConditions(heightInfo, keyHeight) { + key := it.Key() + keyHeight, err := extractHeightFromKey(key) + if err != nil { continue } - txi.setTmpHashes(tmpHashes, it) + withinBounds := checkHeightConditions(heightInfo, keyHeight) + if !withinBounds { + continue + } + txi.setTmpHashes(tmpHashes, key, it.Value()) // Potentially exit early. select { @@ -432,11 +443,16 @@ func (txi *TxIndex) match( } if strings.Contains(extractValueFromKey(it.Key()), c.Operand.(string)) { - keyHeight, err := extractHeightFromKey(it.Key()) - if err != nil || !checkHeightConditions(heightInfo, keyHeight) { + key := it.Key() + keyHeight, err := extractHeightFromKey(key) + if err != nil { + continue + } + withinBounds := checkHeightConditions(heightInfo, keyHeight) + if !withinBounds { continue } - txi.setTmpHashes(tmpHashes, it) + txi.setTmpHashes(tmpHashes, key, it.Value()) } // Potentially exit early. @@ -510,29 +526,37 @@ func (txi *TxIndex) matchRange( panic(err) } defer it.Close() + bigIntValue := new(big.Int) LOOP: for ; it.Valid(); it.Next() { - if !isTagKey(it.Key()) { + // TODO: We need to make a function for getting it.Key() as a byte slice with no copies. + // It currently copies the source data (which can change on a subsequent .Next() call) but that + // is not an issue for us. + key := it.Key() + if !isTagKey(key) { continue } if _, ok := qr.AnyBound().(*big.Int); ok { - v := new(big.Int) - eventValue := extractValueFromKey(it.Key()) - v, ok := v.SetString(eventValue, 10) + value := extractValueFromKey(key) + v, ok := bigIntValue.SetString(value, 10) if !ok { continue LOOP } if qr.Key != types.TxHeightKey { - keyHeight, err := extractHeightFromKey(it.Key()) - if err != nil || !checkHeightConditions(heightInfo, keyHeight) { - continue LOOP + keyHeight, err := extractHeightFromKey(key) + if err != nil { + continue + } + withinBounds := checkHeightConditions(heightInfo, keyHeight) + if !withinBounds { + continue } } if checkBounds(qr, v) { - txi.setTmpHashes(tmpHashes, it) + txi.setTmpHashes(tmpHashes, key, it.Value()) } // XXX: passing time in a ABCI Events is not yet implemented @@ -592,29 +616,62 @@ func isTagKey(key []byte) bool { // tags should 4. Alternatively it should be 3 if the event was not indexed // with the corresponding event sequence. However, some attribute values in // production can contain the tag separator. Therefore, the condition is >= 3. - numTags := strings.Count(string(key), tagKeySeparator) - return numTags >= 3 + numTags := 0 + for i := 0; i < len(key); i++ { + if key[i] == tagKeySeparatorRune { + numTags++ + if numTags >= 3 { + return true + } + } + } + return false } func extractHeightFromKey(key []byte) (int64, error) { - parts := strings.SplitN(string(key), tagKeySeparator, -1) + // the height is the second last element in the key. + // Find the position of the last occurrence of tagKeySeparator + endPos := bytes.LastIndexByte(key, tagKeySeparatorRune) + if endPos == -1 { + return 0, errors.New("separator not found") + } - return strconv.ParseInt(parts[len(parts)-2], 10, 64) + // Find the position of the second last occurrence of tagKeySeparator + startPos := bytes.LastIndexByte(key[:endPos-1], tagKeySeparatorRune) + if startPos == -1 { + return 0, errors.New("second last separator not found") + } + + // Extract the height part of the key + height, err := strconv.ParseInt(string(key[startPos+1:endPos]), 10, 64) + if err != nil { + return 0, err + } + return height, nil } -func extractValueFromKey(key []byte) string { - keyString := string(key) - parts := strings.SplitN(keyString, tagKeySeparator, -1) - partsLen := len(parts) - value := strings.TrimPrefix(keyString, parts[0]+tagKeySeparator) - suffix := "" - suffixLen := 2 +func extractValueFromKey(key []byte) string { + // Find the positions of tagKeySeparator in the byte slice + var indices []int + for i, b := range key { + if b == tagKeySeparatorRune { + indices = append(indices, i) + } + } - for i := 1; i <= suffixLen; i++ { - suffix = tagKeySeparator + parts[partsLen-i] + suffix + // If there are less than 2 occurrences of tagKeySeparator, return an empty string + if len(indices) < 2 { + return "" } - return strings.TrimSuffix(value, suffix) + // Extract the value between the first and second last occurrence of tagKeySeparator + value := key[indices[0]+1 : indices[len(indices)-2]] + + // Trim any leading or trailing whitespace + value = bytes.TrimSpace(value) + + // TODO: Do an unsafe cast to avoid an extra allocation here + return string(value) } func extractEventSeqFromKey(key []byte) string { @@ -627,6 +684,7 @@ func extractEventSeqFromKey(key []byte) string { } return "0" } + func keyForEvent(key string, value string, result *abci.TxResult, eventSeq int64) []byte { return []byte(fmt.Sprintf("%s/%s/%d/%d%s", key,