Skip to content

Commit

Permalink
sstable: generate streamlined blockIter code
Browse files Browse the repository at this point in the history
We automatically generate a "streamlined" version of the blockIter code (in
`block_iter_streamlined.gen.go`). The streamlined code can be used in the
common case and does not support all features. This improves performance by
reducing the conditionals in the hot path.

The streamlinedBlockIter constant can be used in if statements; in the
streamlined version, it gets replaced by "true" (allowing the compiler to
omit blocks of code from the streamlined version).
  • Loading branch information
RaduBerinde committed Mar 4, 2024
1 parent 98a8ea9 commit ee9934e
Show file tree
Hide file tree
Showing 5 changed files with 1,134 additions and 6 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ endif
# addressed; see https://github.com/cockroachdb/crlfmt/pull/44
.PHONY: format
format:
go install github.com/cockroachdb/crlfmt@44a36ec7 && crlfmt -w -tab 2 .
go install github.com/cockroachdb/crlfmt@44a36ec7 && crlfmt -w -tab 2 -ignore '\.gen\.go' .

.PHONY: format-check
format-check:
Expand Down
2 changes: 1 addition & 1 deletion internal/lint/lint_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -242,7 +242,7 @@ func TestLint(t *testing.T) {
t.Run("TestCrlfmt", func(t *testing.T) {
t.Parallel()

args := []string{"run", crlfmt, "-fast", "-tab", "2", "."}
args := []string{"run", crlfmt, "-fast", "-tab", "2", "-ignore", `\.gen\.go`, "."}
var buf bytes.Buffer
if err := stream.ForEach(
stream.Sequence(
Expand Down
199 changes: 199 additions & 0 deletions sstable/block-iter-codegen/main.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,199 @@
// Copyright 2024 The LevelDB-Go and Pebble Authors. All rights reserved. Use
// of this source code is governed by a BSD-style license that can be found in
// the LICENSE file.

// This program is used to generate a "streamlined" version of the code in
// block_iter.go. The goal is to remove conditionals from the hot path in the
// common case.
//
// The block_iter.go code uses a special constant streamlinedBlockIter which is
// always false in th code. This program generates streamlined versions of the
// original methods in which the constant is replaced with true. The methods
// that need streamlining are those that use the constant or call (directly or
// indirectly) a method which uses it.
//
// The higher level code decides whether to call the general or streamlined
// version of a method.
package main

import (
"bytes"
"cmp"
"go/ast"
"go/parser"
"go/printer"
"go/token"
"os"
"slices"
"strings"
)

const inputFile = "block_iter.go"
const outputFile = "block_iter_streamlined.gen.go"
const specialConstant = "streamlinedBlockIter"
const excludedMethod = "init"

const header = `// Code generated by block-iter-codegen; DO NOT EDIT.
package sstable
import (
"bytes"
"encoding/binary"
"slices"
"unsafe"
"github.com/cockroachdb/errors"
"github.com/cockroachdb/pebble/internal/base"
"github.com/cockroachdb/pebble/internal/invariants"
"github.com/cockroachdb/pebble/internal/manual"
)
`

type methodInfo struct {
node *ast.FuncDecl
callers []string
needsStreamlining bool
}

// methods is keyed on the method name.
var methods = map[string]*methodInfo{}

func main() {
// Read the source file.
fset := token.NewFileSet()
astFile, err := parser.ParseFile(fset, inputFile, nil, parser.ParseComments|parser.SkipObjectResolution)
if err != nil {
panic(err)
}

CollectMethods(astFile)

delete(methods, excludedMethod)

// Populate the caller-callee relationships. Conceptually we build a graph
// where each relationship is represented as an edge from callee to caller.
for _, m := range methods {
RecordCaller(m.node)
}

// Mark all methods that directly or indirectly use the special constant.
for name, m := range methods {
if UsesSpecialConstant(m.node) {
DF(name)
}
}

var toOutput []*methodInfo
for name, m := range methods {
if m.needsStreamlining {
// Rename method.
m.node.Name.Name = Streamlined(name)

// Rename uses of the special constant and any calls to methods that need
// streamlining.
ast.Inspect(astFile, func(n ast.Node) bool {
switch n := n.(type) {
case *ast.Ident:
if n.Name == specialConstant {
// This is kind of hacky, as "true" wouldn't be an Ident; but it prints
// out correctly.
n.Name = "true"
}
case *ast.CallExpr:
// Rename method calls to streamlined functions.
if s, ok := n.Fun.(*ast.SelectorExpr); ok {
if m, ok := methods[s.Sel.Name]; ok && m.needsStreamlining {
s.Sel.Name = Streamlined(s.Sel.Name)
}
}
}
return true
})

toOutput = append(toOutput, m)
}
}
// Sort by position in the original file.
slices.SortFunc(toOutput, func(a, b *methodInfo) int {
return cmp.Compare(a.node.Body.Lbrace, b.node.Body.Lbrace)
})

var buf bytes.Buffer
buf.WriteString(header)
for _, m := range toOutput {
buf.WriteString("\n")
printer.Fprint(&buf, fset, m.node)
buf.WriteString("\n")
}
if err := os.WriteFile(outputFile, buf.Bytes(), 0666); err != nil {
panic(err)
}
}

// CollectMethods adds all top-level method declarations to the methods map.
func CollectMethods(root ast.Node) {
ast.Inspect(root, func(n ast.Node) bool {
switch n := n.(type) {
case *ast.FuncDecl:
if n.Recv != nil {
methods[n.Name.Name] = &methodInfo{node: n}
}
return false
}
return true
})
}

// RecordCaller adds the given function to the callers lists of any methods it
// calls directly.
func RecordCaller(f *ast.FuncDecl) {
fName := f.Name.Name
ast.Inspect(f, func(n ast.Node) bool {
switch n := n.(type) {
case *ast.CallExpr:
if s, ok := n.Fun.(*ast.SelectorExpr); ok {
if m, ok := methods[s.Sel.Name]; ok {
m.callers = append(m.callers, fName)
}
}
}
return true
})
}

// UsesSpecialConstant returns true if the given function uses the special
// constant directly.
func UsesSpecialConstant(f *ast.FuncDecl) bool {
result := false
ast.Inspect(f, func(n ast.Node) bool {
switch n := n.(type) {
case *ast.Ident:
if n.Name == specialConstant {
result = true
}
}
return true
})
return result
}

// DF recursively sets the needsStreamlining flag for the given method and all
// direct or indirect caller methods.
func DF(name string) {
if m, ok := methods[name]; ok && !m.needsStreamlining {
m.needsStreamlining = true
for _, caller := range m.callers {
DF(caller)
}
}
}

// Streamlined returns the streamlined version of the method name.
// E.g. "foo" -> "streamlinedFoo"; "Foo" -> "StreamlinedFoo".
func Streamlined(n string) string {
if n[0] >= 'A' && n[0] <= 'Z' {
return "Streamlined" + n
}
return "streamlined" + strings.ToUpper(n[:1]) + n[1:]
}
22 changes: 18 additions & 4 deletions sstable/block_iter.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,18 @@ import (
"golang.org/x/exp/slices"
)

// We automatically generate a "streamlined" version of the blockIter code (in
// block_iter_streamlined.gen.go). The streamlined code can be used in the
// common case and does not support all features. This improves performance by
// reducing the conditionals in the hot path.
//
// The streamlinedBlockIter constant can be used in if statements; in the
// streamlined version, it gets replaced by "true" (allowing the compiler to
// omit blocks of code from the streamlined version).
//
//go:generate go run ./block-iter-codegen
const streamlinedBlockIter = false

// blockIter is an iterator over a single block of data.
//
// A blockIter provides an additional guarantee around key stability when a
Expand Down Expand Up @@ -222,7 +234,7 @@ func (i *blockIter) init(cmp Compare, split Split, block block, transforms IterT
i.numRestarts = numRestarts
i.ptr = unsafe.Pointer(&block[0])
i.data = block
if i.transforms.SyntheticPrefix != nil {
if !streamlinedBlockIter && i.transforms.SyntheticPrefix != nil {
i.fullKey = append(i.fullKey[:0], i.transforms.SyntheticPrefix...)
} else {
i.fullKey = i.fullKey[:0]
Expand Down Expand Up @@ -345,7 +357,9 @@ func (i *blockIter) readEntry() {
value = uint32(e)<<28 | uint32(d)<<21 | uint32(c)<<14 | uint32(b)<<7 | uint32(a)
ptr = unsafe.Pointer(uintptr(ptr) + 5)
}
shared += uint32(len(i.transforms.SyntheticPrefix))
if !streamlinedBlockIter {
shared += uint32(len(i.transforms.SyntheticPrefix))
}
unsharedKey := getBytes(ptr, int(unshared))
// TODO(sumeer): move this into the else block below.
i.fullKey = append(i.fullKey[:shared], unsharedKey...)
Expand Down Expand Up @@ -422,7 +436,7 @@ func (i *blockIter) readFirstKey() error {
i.firstUserKey = nil
return base.CorruptionErrorf("pebble/table: invalid firstKey in block")
}
if i.transforms.SyntheticPrefix != nil {
if !streamlinedBlockIter && i.transforms.SyntheticPrefix != nil {
i.firstUserKeyWithPrefixBuf = slices.Grow(i.firstUserKeyWithPrefixBuf[:0], len(i.transforms.SyntheticPrefix)+len(i.firstUserKey))
i.firstUserKeyWithPrefixBuf = append(i.firstUserKeyWithPrefixBuf, i.transforms.SyntheticPrefix...)
i.firstUserKeyWithPrefixBuf = append(i.firstUserKeyWithPrefixBuf, i.firstUserKey...)
Expand Down Expand Up @@ -507,7 +521,7 @@ func (i *blockIter) SeekGE(key []byte, flags base.SeekGEFlags) (*InternalKey, ba
panic(errors.AssertionFailedf("invalidated blockIter used"))
}
searchKey := key
if i.transforms.SyntheticPrefix != nil {
if !streamlinedBlockIter && i.transforms.SyntheticPrefix != nil {
// The seek key is before or after the entire block of keys that start with
// SyntheticPrefix. To determine which, we need to compare against a valid
// key in the block. We use firstUserKey which has the synthetic prefix.
Expand Down

0 comments on commit ee9934e

Please sign in to comment.