Skip to content

Commit

Permalink
Add some experiments with schemas
Browse files Browse the repository at this point in the history
  • Loading branch information
simonswine committed Jul 11, 2022
1 parent 91a5511 commit 68da7be
Show file tree
Hide file tree
Showing 3 changed files with 294 additions and 0 deletions.
18 changes: 18 additions & 0 deletions pkg/firedb/schemas/schema_test.go
@@ -0,0 +1,18 @@
package schema

import (
"testing"

"github.com/grafana/fire/pkg/firedb"
v1 "github.com/grafana/fire/pkg/firedb/schemas/v1"
"github.com/segmentio/parquet-go"
"github.com/stretchr/testify/require"
)

func TestSchema(t *testing.T) {

originalSchema := parquet.SchemaOf(&firedb.Profile{})

v1Schema := v1.ProfilesSchema()
require.Equal(t, originalSchema.String(), v1Schema.String())
}
162 changes: 162 additions & 0 deletions pkg/firedb/schemas/v1/schema.go
@@ -0,0 +1,162 @@
package v1

import (
"reflect"
"strings"
"unicode"
"unicode/utf8"

"github.com/segmentio/parquet-go"
"github.com/segmentio/parquet-go/compress"
"github.com/segmentio/parquet-go/deprecated"
"github.com/segmentio/parquet-go/encoding"
"github.com/segmentio/parquet-go/format"
)

type Group []*groupField

func (g Group) String() string {
s := new(strings.Builder)
parquet.PrintSchema(s, "", g)
return s.String()
}

func (g Group) Type() parquet.Type { return &groupType{} }

func (g Group) Optional() bool { return false }

func (g Group) Repeated() bool { return false }

func (g Group) Required() bool { return true }

func (g Group) Leaf() bool { return false }

func (g Group) Fields() []parquet.Field {
fields := make([]parquet.Field, len(g))
for pos := range g {
fields[pos] = g[pos]
}
return fields
}

func (g Group) Encoding() encoding.Encoding { return nil }

func (g Group) Compression() compress.Codec { return nil }

func (g Group) GoType() reflect.Type { return goTypeOfGroup(g) }

func exportedStructFieldName(name string) string {
firstRune, size := utf8.DecodeRuneInString(name)
return string([]rune{unicode.ToUpper(firstRune)}) + name[size:]
}

func goTypeOfGroup(node parquet.Node) reflect.Type {
fields := node.Fields()
structFields := make([]reflect.StructField, len(fields))
for i, field := range fields {
structFields[i].Name = exportedStructFieldName(field.Name())
structFields[i].Type = field.GoType()
// TODO: can we reconstruct a struct tag that would be valid if a value
// of this type were passed to SchemaOf?
}
return reflect.StructOf(structFields)
}

type groupField struct {
parquet.Node
name string
}

type groupType struct{}

func (groupType) String() string { return "group" }

func (groupType) Kind() parquet.Kind {
panic("cannot call Kind on parquet group")
}

func (groupType) Compare(parquet.Value, parquet.Value) int {
panic("cannot compare values on parquet group")
}

func (groupType) NewColumnIndexer(int) parquet.ColumnIndexer {
panic("cannot create column indexer from parquet group")
}

func (groupType) NewDictionary(int, int, []byte) parquet.Dictionary {
panic("cannot create dictionary from parquet group")
}

func (t groupType) NewColumnBuffer(int, int) parquet.ColumnBuffer {
panic("cannot create column buffer from parquet group")
}

func (t groupType) NewPage(int, int, []byte) parquet.Page {
panic("cannot create page from parquet group")
}

func (groupType) Encode(_, _ []byte, _ encoding.Encoding) ([]byte, error) {
panic("cannot encode parquet group")
}

func (groupType) Decode(_, _ []byte, _ encoding.Encoding) ([]byte, error) {
panic("cannot decode parquet group")
}

func (groupType) Length() int { return 0 }

func (groupType) EstimateSize(int) int64 { return 0 }

func (groupType) ColumnOrder() *format.ColumnOrder { return nil }

func (groupType) PhysicalType() *format.Type { return nil }

func (groupType) LogicalType() *format.LogicalType { return nil }

func (groupType) ConvertedType() *deprecated.ConvertedType { return nil }

func (f *groupField) Name() string { return f.name }

func (f *groupField) Value(base reflect.Value) reflect.Value {
return base.MapIndex(reflect.ValueOf(&f.name).Elem())
}

func ProfilesSchema() *parquet.Schema {
stringRef := parquet.Encoded(parquet.Int(64), &parquet.DeltaBinaryPacked)
sampleType := parquet.Group{
"Type": stringRef,
"Unit": stringRef,
}

externalLabels := parquet.Repeated(Group{
{name: "Name", Node: stringRef},
{name: "Value", Node: stringRef},
})

pprofLabels := parquet.Repeated(Group{
{name: "Key", Node: stringRef},
{name: "Str", Node: parquet.Optional(stringRef)},
{name: "Num", Node: parquet.Optional(parquet.Int(64))},
{name: "NumUnit", Node: parquet.Optional(stringRef)},
})

s := parquet.NewSchema("Profile", Group{
{name: "ID", Node: parquet.UUID()},
{name: "ExternalLabels", Node: externalLabels},
{name: "Types", Node: parquet.Repeated(sampleType)},
{name: "Samples", Node: parquet.Repeated(Group{
{name: "LocationIds", Node: parquet.Repeated(parquet.Uint(64))},
{name: "Values", Node: parquet.Repeated(parquet.Encoded(parquet.Int(64), &parquet.DeltaBinaryPacked))},
{name: "Labels", Node: pprofLabels},
})},
{name: "DropFrames", Node: stringRef},
{name: "KeepFrames", Node: stringRef},
{name: "TimeNanos", Node: parquet.Timestamp(parquet.Nanosecond)},
{name: "DurationNanos", Node: parquet.Int(64)},
{name: "PeriodType", Node: parquet.Optional(sampleType)},
{name: "Period", Node: parquet.Int(64)},
{name: "Comments", Node: parquet.Repeated(stringRef)},
{name: "DefaultSampleType", Node: parquet.Int(64)},
})
return s
}
114 changes: 114 additions & 0 deletions pkg/firedb/schemas/v2/schema.go
@@ -0,0 +1,114 @@
package v2

import (
"github.com/polarsignals/frostdb/dynparquet"
"github.com/segmentio/parquet-go"
)

const (
ColumnID = "id"
ColumnLabels = "labels"
ColumnSampleType = "sample_type"
ColumnSampleUnit = "sample_unit"
ColumnLocationIDs = "location_ids"
ColumnSamples = "samples"
ColumnPprofLabels = "pprof_labels"
ColumnDropFrames = "drop_frames"
ColumnKeepFrames = "keep_frames"
ColumnTimeNanos = "time_nanos"
ColumnDurationNanos = "duration_nanos"
ColumnPeriod = "period"
ColumnPeriodType = "period_type"
ColumnPeriodUnit = "period_unit"
ColumnComments = "comments"
)

func Profiles() *dynparquet.Schema {
stringRef := parquet.Encoded(parquet.Int(64), &parquet.DeltaBinaryPacked)

labels := parquet.Repeated(parquet.Group{
"Name": stringRef,
"Value": stringRef,
})

pprofLabels := parquet.Repeated(parquet.Group{
"Key": stringRef,
"Str": parquet.Optional(stringRef),
"Num": parquet.Optional(parquet.Int(64)),
"NumUnit": parquet.Optional(stringRef),
})

return dynparquet.NewSchema(
"profiles",
[]dynparquet.ColumnDefinition{
{
Name: ColumnID,
StorageLayout: parquet.Int(64),
Dynamic: false,
}, {
Name: ColumnLabels,
StorageLayout: labels,
Dynamic: false,
}, {
Name: ColumnSampleType,
StorageLayout: stringRef,
Dynamic: false,
}, {
Name: ColumnSampleUnit,
StorageLayout: stringRef,
Dynamic: false,
}, {
Name: ColumnLocationIDs,
StorageLayout: parquet.Repeated(parquet.Repeated(parquet.Uint(64))),
Dynamic: false,
}, {
Name: ColumnSamples,
StorageLayout: parquet.Repeated(parquet.Encoded(parquet.Int(64), &parquet.DeltaBinaryPacked)),
Dynamic: false,
}, {
Name: ColumnPprofLabels,
StorageLayout: pprofLabels,
Dynamic: false,
}, {
Name: ColumnDropFrames,
StorageLayout: stringRef,
Dynamic: false,
}, {
Name: ColumnKeepFrames,
StorageLayout: stringRef,
Dynamic: false,
}, {
Name: ColumnTimeNanos,
StorageLayout: parquet.Timestamp(parquet.Nanosecond),
Dynamic: false,
}, {
Name: ColumnDurationNanos,
StorageLayout: parquet.Int(64),
Dynamic: false,
}, {
Name: ColumnPeriod,
StorageLayout: parquet.Int(64),
Dynamic: false,
}, {
Name: ColumnPeriodType,
StorageLayout: stringRef,
Dynamic: false,
}, {
Name: ColumnPeriodUnit,
StorageLayout: stringRef,
Dynamic: true,
},
},
[]dynparquet.SortingColumn{
dynparquet.Ascending(ColumnID),
dynparquet.Ascending(ColumnSampleType),
dynparquet.Ascending(ColumnSampleUnit),
dynparquet.Ascending(ColumnPeriodType),
dynparquet.Ascending(ColumnPeriodUnit),
dynparquet.NullsFirst(dynparquet.Ascending(ColumnLabels)),
dynparquet.NullsFirst(dynparquet.Ascending(ColumnLocationIDs)),
dynparquet.Ascending(ColumnTimeNanos),
dynparquet.NullsFirst(dynparquet.Ascending(ColumnPprofLabels)),
},
)
}

0 comments on commit 68da7be

Please sign in to comment.