Skip to content

Commit

Permalink
map: zero-allocation operations for common types
Browse files Browse the repository at this point in the history
Map keys and values are currently marshaled into []byte by souped
up versions of binary.Write and binary.Read. This allows users to
be blissfully unaware of compiler inserted padding on the Go side.
This is wasteful in case the Go in-memory representation matches
what the kernel expects because we need additional allocations.

Refactor syscall marshaling into a new package sysenc which
encapsulates the logic we need to determine whether a Go type
is safe for zero-allocation / zero-copy marshaling. The type
must be a pointer to or a slice of:

* A primitive type like uint32, ... or
* An array of valid types or
* A struct made up of valid types without any compiler
  inserted padding between fields

Per-CPU maps don't support zero-allocation operations for now,
but the new code already makes things a little bit cheaper.

Structs with trailing padding also don't benefit from the
optimization for now. Consider

    type padded struct { A uint32; B uint16 }

Allowing such a type creates an edge case: make([]padding, 1)
uses zero-allocation marshaling while make([]padding, 2)
doesn't, due to interior padding. It's simpler to skip such
types for now.

    goos: linux
    goarch: amd64
    pkg: github.com/cilium/ebpf
    cpu: 12th Gen Intel(R) Core(TM) i7-1260P
                                         │ unsafe.txt  │
                                         │   sec/op    │
    Marshaling/ValueUnmarshalReflect-16    356.1n ± 2%
    Marshaling/KeyMarshalReflect-16        368.6n ± 1%
    Marshaling/ValueBinaryUnmarshaler-16   378.6n ± 2%
    Marshaling/KeyBinaryMarshaler-16       356.2n ± 1%
    Marshaling/KeyValueUnsafe-16           328.0n ± 2%
    PerCPUMarshalling/reflection-16        1.232µ ± 1%

                                         │  unsafe.txt  │
                                         │     B/op     │
    Marshaling/ValueUnmarshalReflect-16    0.000 ± 0%
    Marshaling/KeyMarshalReflect-16        0.000 ± 0%
    Marshaling/ValueBinaryUnmarshaler-16   24.00 ± 0%
    Marshaling/KeyBinaryMarshaler-16       8.000 ± 0%
    Marshaling/KeyValueUnsafe-16           0.000 ± 0%
    PerCPUMarshalling/reflection-16        280.0 ± 0%

                                         │  unsafe.txt  │
                                         │  allocs/op   │
    Marshaling/ValueUnmarshalReflect-16    0.000 ± 0%
    Marshaling/KeyMarshalReflect-16        0.000 ± 0%
    Marshaling/ValueBinaryUnmarshaler-16   1.000 ± 0%
    Marshaling/KeyBinaryMarshaler-16       1.000 ± 0%
    Marshaling/KeyValueUnsafe-16           0.000 ± 0%
    PerCPUMarshalling/reflection-16        3.000 ± 0%

Signed-off-by: Lorenz Bauer <lmb@isovalent.com>
  • Loading branch information
lmb authored and ti-mo committed Sep 14, 2023
1 parent 74e1d7b commit 4609dc7
Show file tree
Hide file tree
Showing 15 changed files with 719 additions and 196 deletions.
5 changes: 3 additions & 2 deletions collection.go
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import (
"github.com/cilium/ebpf/btf"
"github.com/cilium/ebpf/internal"
"github.com/cilium/ebpf/internal/kconfig"
"github.com/cilium/ebpf/internal/sysenc"
)

// CollectionOptions control loading a collection into the kernel.
Expand Down Expand Up @@ -175,12 +176,12 @@ func (cs *CollectionSpec) RewriteConstants(consts map[string]interface{}) error
return fmt.Errorf("section %s: offset %d(+%d) for variable %s is out of bounds", name, v.Offset, v.Size, vname)
}

b, err := marshalBytes(replacement, int(v.Size))
b, err := sysenc.Marshal(replacement, int(v.Size))
if err != nil {
return fmt.Errorf("marshaling constant replacement %s: %w", vname, err)
}

copy(cpy[v.Offset:v.Offset+v.Size], b)
b.CopyTo(cpy[v.Offset : v.Offset+v.Size])

replaced[vname] = true
}
Expand Down
2 changes: 1 addition & 1 deletion internal/endian_be.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import "encoding/binary"

// NativeEndian is set to either binary.BigEndian or binary.LittleEndian,
// depending on the host's endianness.
var NativeEndian binary.ByteOrder = binary.BigEndian
var NativeEndian = binary.BigEndian

// ClangEndian is set to either "el" or "eb" depending on the host's endianness.
const ClangEndian = "eb"
2 changes: 1 addition & 1 deletion internal/endian_le.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ import "encoding/binary"

// NativeEndian is set to either binary.BigEndian or binary.LittleEndian,
// depending on the host's endianness.
var NativeEndian binary.ByteOrder = binary.LittleEndian
var NativeEndian = binary.LittleEndian

// ClangEndian is set to either "el" or "eb" depending on the host's endianness.
const ClangEndian = "el"
77 changes: 77 additions & 0 deletions internal/sysenc/buffer.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
package sysenc

import (
"unsafe"

"github.com/cilium/ebpf/internal/sys"
)

type Buffer struct {
ptr unsafe.Pointer
// Size of the buffer. syscallPointerOnly if created from UnsafeBuffer or when using
// zero-copy unmarshaling.
size int
}

const syscallPointerOnly = -1

func newBuffer(buf []byte) Buffer {
if len(buf) == 0 {
return Buffer{}
}
return Buffer{unsafe.Pointer(&buf[0]), len(buf)}
}

// UnsafeBuffer constructs a Buffer for zero-copy unmarshaling.
//
// [Pointer] is the only valid method to call on such a Buffer.
// Use [SyscallBuffer] instead if possible.
func UnsafeBuffer(ptr unsafe.Pointer) Buffer {
return Buffer{ptr, syscallPointerOnly}
}

// SyscallOutput prepares a Buffer for a syscall to write into.
//
// The buffer may point at the underlying memory of dst, in which case [Unmarshal]
// becomes a no-op.
//
// The contents of the buffer are undefined and may be non-zero.
func SyscallOutput(dst any, size int) Buffer {
if dstBuf := unsafeBackingMemory(dst); len(dstBuf) == size {
buf := newBuffer(dstBuf)
buf.size = syscallPointerOnly
return buf
}

return newBuffer(make([]byte, size))
}

// CopyTo copies the buffer into dst.
//
// Returns the number of copied bytes.
func (b Buffer) CopyTo(dst []byte) int {
return copy(dst, b.unsafeBytes())
}

// Pointer returns the location where a syscall should write.
func (b Buffer) Pointer() sys.Pointer {
// NB: This deliberately ignores b.length to support zero-copy
// marshaling / unmarshaling using unsafe.Pointer.
return sys.NewPointer(b.ptr)
}

// Unmarshal the buffer into the provided value.
func (b Buffer) Unmarshal(data any) error {
if b.size == syscallPointerOnly {
return nil
}

return Unmarshal(data, b.unsafeBytes())
}

func (b Buffer) unsafeBytes() []byte {
if b.size == syscallPointerOnly {
return nil
}
return unsafe.Slice((*byte)(b.ptr), b.size)
}
27 changes: 27 additions & 0 deletions internal/sysenc/buffer_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
package sysenc_test

import (
"testing"
"unsafe"

"github.com/cilium/ebpf/internal/sys"
"github.com/cilium/ebpf/internal/sysenc"
qt "github.com/frankban/quicktest"
)

func TestZeroBuffer(t *testing.T) {
var zero sysenc.Buffer

qt.Assert(t, zero.CopyTo(make([]byte, 1)), qt.Equals, 0)
qt.Assert(t, zero.Pointer(), qt.Equals, sys.Pointer{})
qt.Assert(t, zero.Unmarshal(new(uint16)), qt.IsNotNil)
}

func TestUnsafeBuffer(t *testing.T) {
ptr := unsafe.Pointer(new(uint16))
buf := sysenc.UnsafeBuffer(ptr)

qt.Assert(t, buf.CopyTo(make([]byte, 1)), qt.Equals, 0)
qt.Assert(t, buf.Pointer(), qt.Equals, sys.NewPointer(ptr))
qt.Assert(t, buf.Unmarshal(new(uint16)), qt.IsNil)
}
3 changes: 3 additions & 0 deletions internal/sysenc/doc.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
// Package sysenc provides efficient conversion of Go values to system
// call interfaces.
package sysenc
41 changes: 41 additions & 0 deletions internal/sysenc/layout.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
// Copyright 2009 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found at https://go.dev/LICENSE.

package sysenc

import (
"reflect"
"sync"
)

var hasUnexportedFieldsCache sync.Map // map[reflect.Type]bool

func hasUnexportedFields(typ reflect.Type) bool {
switch typ.Kind() {
case reflect.Slice, reflect.Array, reflect.Pointer:
return hasUnexportedFields(typ.Elem())

case reflect.Struct:
if unexported, ok := hasUnexportedFieldsCache.Load(typ); ok {
return unexported.(bool)
}

unexported := false
for i, n := 0, typ.NumField(); i < n; i++ {
field := typ.Field(i)
// Package binary allows _ fields but always writes zeroes into them.
if (!field.IsExported() && field.Name != "_") || hasUnexportedFields(field.Type) {
unexported = true
break
}
}

hasUnexportedFieldsCache.Store(typ, unexported)
return unexported

default:
// NB: It's not clear what this means for Chan and so on.
return false
}
}
37 changes: 37 additions & 0 deletions internal/sysenc/layout_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
package sysenc

import (
"fmt"
"reflect"
"testing"

qt "github.com/frankban/quicktest"
)

func TestHasUnexportedFields(t *testing.T) {
for _, test := range []struct {
value any
result bool
}{
{struct{ A any }{}, false},
{(*struct{ A any })(nil), false},
{([]struct{ A any })(nil), false},
{[1]struct{ A any }{}, false},
{struct{ _ any }{}, false},
{struct{ _ struct{ a any } }{}, true},
{(*struct{ _ any })(nil), false},
{([]struct{ _ any })(nil), false},
{[1]struct{ _ any }{}, false},
{struct{ a any }{}, true},
{(*struct{ a any })(nil), true},
{([]struct{ a any })(nil), true},
{[1]struct{ a any }{}, true},
{(*struct{ A []struct{ a any } })(nil), true},
{(*struct{ A [1]struct{ a any } })(nil), true},
} {
t.Run(fmt.Sprintf("%T", test.value), func(t *testing.T) {
have := hasUnexportedFields(reflect.TypeOf(test.value))
qt.Assert(t, have, qt.Equals, test.result)
})
}
}
163 changes: 163 additions & 0 deletions internal/sysenc/marshal.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
package sysenc

import (
"bytes"
"encoding"
"encoding/binary"
"errors"
"fmt"
"reflect"
"sync"
"unsafe"

"github.com/cilium/ebpf/internal"
)

// Marshal turns data into a byte slice using the system's native endianness.
//
// If possible, avoids allocations by directly using the backing memory
// of data. This means that the variable must not be modified for the lifetime
// of the returned [Buffer].
//
// Returns an error if the data can't be turned into a byte slice according to
// the behaviour of [binary.Write].
func Marshal(data any, size int) (Buffer, error) {
if data == nil {
return Buffer{}, errors.New("can't marshal a nil value")
}

var buf []byte
var err error
switch value := data.(type) {
case encoding.BinaryMarshaler:
buf, err = value.MarshalBinary()
case string:
buf = unsafe.Slice(unsafe.StringData(value), len(value))
case []byte:
buf = value
case int16:
buf = internal.NativeEndian.AppendUint16(make([]byte, 0, 2), uint16(value))
case uint16:
buf = internal.NativeEndian.AppendUint16(make([]byte, 0, 2), value)
case int32:
buf = internal.NativeEndian.AppendUint32(make([]byte, 0, 4), uint32(value))
case uint32:
buf = internal.NativeEndian.AppendUint32(make([]byte, 0, 4), value)
case int64:
buf = internal.NativeEndian.AppendUint64(make([]byte, 0, 8), uint64(value))
case uint64:
buf = internal.NativeEndian.AppendUint64(make([]byte, 0, 8), value)
default:
if buf := unsafeBackingMemory(data); len(buf) == size {
return newBuffer(buf), nil
}

wr := internal.NewBuffer(make([]byte, 0, size))
defer internal.PutBuffer(wr)

err = binary.Write(wr, internal.NativeEndian, value)
buf = wr.Bytes()
}
if err != nil {
return Buffer{}, err
}

if len(buf) != size {
return Buffer{}, fmt.Errorf("%T doesn't marshal to %d bytes", data, size)
}

return newBuffer(buf), nil
}

var bytesReaderPool = sync.Pool{
New: func() interface{} {
return new(bytes.Reader)
},
}

// Unmarshal a byte slice in the system's native endianness into data.
//
// Returns an error if buf can't be unmarshalled according to the behaviour
// of [binary.Read].
func Unmarshal(data interface{}, buf []byte) error {
switch value := data.(type) {
case encoding.BinaryUnmarshaler:
return value.UnmarshalBinary(buf)

case *string:
*value = string(buf)
return nil

default:
if dataBuf := unsafeBackingMemory(data); len(dataBuf) == len(buf) {
copy(dataBuf, buf)
return nil
}

rd := bytesReaderPool.Get().(*bytes.Reader)
defer bytesReaderPool.Put(rd)

rd.Reset(buf)

return binary.Read(rd, internal.NativeEndian, value)
}
}

// unsafeBackingMemory returns the backing memory of data if it can be used
// instead of calling into package binary.
//
// Returns nil if the value is not a pointer or a slice, or if it contains
// padding or unexported fields.
func unsafeBackingMemory(data any) []byte {
if data == nil {
return nil
}

value := reflect.ValueOf(data)
var valueSize int
switch value.Kind() {
case reflect.Pointer:
if value.IsNil() {
return nil
}

if elemType := value.Type().Elem(); elemType.Kind() != reflect.Slice {
valueSize = int(elemType.Size())
break
}

// We're dealing with a pointer to a slice. Dereference and
// handle it like a regular slice.
value = value.Elem()
fallthrough

case reflect.Slice:
valueSize = int(value.Type().Elem().Size()) * value.Len()

default:
// Prevent Value.UnsafePointer from panicking.
return nil
}

// Some nil pointer types currently crash binary.Size. Call it after our own
// code so that the panic isn't reachable.
// See https://github.com/golang/go/issues/60892
if size := binary.Size(data); size == -1 || size != valueSize {
// The type contains padding or unsupported types.
return nil
}

if hasUnexportedFields(reflect.TypeOf(data)) {
return nil
}

// Reinterpret the pointer as a byte slice. This violates the unsafe.Pointer
// rules because it's very unlikely that the source data has "an equivalent
// memory layout". However, we can make it safe-ish because of the
// following reasons:
// - There is no alignment mismatch since we cast to a type with an
// alignment of 1.
// - There are no pointers in the source type so we don't upset the GC.
// - The length is verified at runtime.
return unsafe.Slice((*byte)(value.UnsafePointer()), valueSize)
}

0 comments on commit 4609dc7

Please sign in to comment.