Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
91a5511
commit 68da7be
Showing
3 changed files
with
294 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
package schema | ||
|
||
import ( | ||
"testing" | ||
|
||
"github.com/grafana/fire/pkg/firedb" | ||
v1 "github.com/grafana/fire/pkg/firedb/schemas/v1" | ||
"github.com/segmentio/parquet-go" | ||
"github.com/stretchr/testify/require" | ||
) | ||
|
||
func TestSchema(t *testing.T) { | ||
|
||
originalSchema := parquet.SchemaOf(&firedb.Profile{}) | ||
|
||
v1Schema := v1.ProfilesSchema() | ||
require.Equal(t, originalSchema.String(), v1Schema.String()) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,162 @@ | ||
package v1 | ||
|
||
import ( | ||
"reflect" | ||
"strings" | ||
"unicode" | ||
"unicode/utf8" | ||
|
||
"github.com/segmentio/parquet-go" | ||
"github.com/segmentio/parquet-go/compress" | ||
"github.com/segmentio/parquet-go/deprecated" | ||
"github.com/segmentio/parquet-go/encoding" | ||
"github.com/segmentio/parquet-go/format" | ||
) | ||
|
||
type Group []*groupField | ||
|
||
func (g Group) String() string { | ||
s := new(strings.Builder) | ||
parquet.PrintSchema(s, "", g) | ||
return s.String() | ||
} | ||
|
||
func (g Group) Type() parquet.Type { return &groupType{} } | ||
|
||
func (g Group) Optional() bool { return false } | ||
|
||
func (g Group) Repeated() bool { return false } | ||
|
||
func (g Group) Required() bool { return true } | ||
|
||
func (g Group) Leaf() bool { return false } | ||
|
||
func (g Group) Fields() []parquet.Field { | ||
fields := make([]parquet.Field, len(g)) | ||
for pos := range g { | ||
fields[pos] = g[pos] | ||
} | ||
return fields | ||
} | ||
|
||
func (g Group) Encoding() encoding.Encoding { return nil } | ||
|
||
func (g Group) Compression() compress.Codec { return nil } | ||
|
||
func (g Group) GoType() reflect.Type { return goTypeOfGroup(g) } | ||
|
||
func exportedStructFieldName(name string) string { | ||
firstRune, size := utf8.DecodeRuneInString(name) | ||
return string([]rune{unicode.ToUpper(firstRune)}) + name[size:] | ||
} | ||
|
||
func goTypeOfGroup(node parquet.Node) reflect.Type { | ||
fields := node.Fields() | ||
structFields := make([]reflect.StructField, len(fields)) | ||
for i, field := range fields { | ||
structFields[i].Name = exportedStructFieldName(field.Name()) | ||
structFields[i].Type = field.GoType() | ||
// TODO: can we reconstruct a struct tag that would be valid if a value | ||
// of this type were passed to SchemaOf? | ||
} | ||
return reflect.StructOf(structFields) | ||
} | ||
|
||
type groupField struct { | ||
parquet.Node | ||
name string | ||
} | ||
|
||
type groupType struct{} | ||
|
||
func (groupType) String() string { return "group" } | ||
|
||
func (groupType) Kind() parquet.Kind { | ||
panic("cannot call Kind on parquet group") | ||
} | ||
|
||
func (groupType) Compare(parquet.Value, parquet.Value) int { | ||
panic("cannot compare values on parquet group") | ||
} | ||
|
||
func (groupType) NewColumnIndexer(int) parquet.ColumnIndexer { | ||
panic("cannot create column indexer from parquet group") | ||
} | ||
|
||
func (groupType) NewDictionary(int, int, []byte) parquet.Dictionary { | ||
panic("cannot create dictionary from parquet group") | ||
} | ||
|
||
func (t groupType) NewColumnBuffer(int, int) parquet.ColumnBuffer { | ||
panic("cannot create column buffer from parquet group") | ||
} | ||
|
||
func (t groupType) NewPage(int, int, []byte) parquet.Page { | ||
panic("cannot create page from parquet group") | ||
} | ||
|
||
func (groupType) Encode(_, _ []byte, _ encoding.Encoding) ([]byte, error) { | ||
panic("cannot encode parquet group") | ||
} | ||
|
||
func (groupType) Decode(_, _ []byte, _ encoding.Encoding) ([]byte, error) { | ||
panic("cannot decode parquet group") | ||
} | ||
|
||
func (groupType) Length() int { return 0 } | ||
|
||
func (groupType) EstimateSize(int) int64 { return 0 } | ||
|
||
func (groupType) ColumnOrder() *format.ColumnOrder { return nil } | ||
|
||
func (groupType) PhysicalType() *format.Type { return nil } | ||
|
||
func (groupType) LogicalType() *format.LogicalType { return nil } | ||
|
||
func (groupType) ConvertedType() *deprecated.ConvertedType { return nil } | ||
|
||
func (f *groupField) Name() string { return f.name } | ||
|
||
func (f *groupField) Value(base reflect.Value) reflect.Value { | ||
return base.MapIndex(reflect.ValueOf(&f.name).Elem()) | ||
} | ||
|
||
func ProfilesSchema() *parquet.Schema { | ||
stringRef := parquet.Encoded(parquet.Int(64), &parquet.DeltaBinaryPacked) | ||
sampleType := parquet.Group{ | ||
"Type": stringRef, | ||
"Unit": stringRef, | ||
} | ||
|
||
externalLabels := parquet.Repeated(Group{ | ||
{name: "Name", Node: stringRef}, | ||
{name: "Value", Node: stringRef}, | ||
}) | ||
|
||
pprofLabels := parquet.Repeated(Group{ | ||
{name: "Key", Node: stringRef}, | ||
{name: "Str", Node: parquet.Optional(stringRef)}, | ||
{name: "Num", Node: parquet.Optional(parquet.Int(64))}, | ||
{name: "NumUnit", Node: parquet.Optional(stringRef)}, | ||
}) | ||
|
||
s := parquet.NewSchema("Profile", Group{ | ||
{name: "ID", Node: parquet.UUID()}, | ||
{name: "ExternalLabels", Node: externalLabels}, | ||
{name: "Types", Node: parquet.Repeated(sampleType)}, | ||
{name: "Samples", Node: parquet.Repeated(Group{ | ||
{name: "LocationIds", Node: parquet.Repeated(parquet.Uint(64))}, | ||
{name: "Values", Node: parquet.Repeated(parquet.Encoded(parquet.Int(64), &parquet.DeltaBinaryPacked))}, | ||
{name: "Labels", Node: pprofLabels}, | ||
})}, | ||
{name: "DropFrames", Node: stringRef}, | ||
{name: "KeepFrames", Node: stringRef}, | ||
{name: "TimeNanos", Node: parquet.Timestamp(parquet.Nanosecond)}, | ||
{name: "DurationNanos", Node: parquet.Int(64)}, | ||
{name: "PeriodType", Node: parquet.Optional(sampleType)}, | ||
{name: "Period", Node: parquet.Int(64)}, | ||
{name: "Comments", Node: parquet.Repeated(stringRef)}, | ||
{name: "DefaultSampleType", Node: parquet.Int(64)}, | ||
}) | ||
return s | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,114 @@ | ||
package v2 | ||
|
||
import ( | ||
"github.com/polarsignals/frostdb/dynparquet" | ||
"github.com/segmentio/parquet-go" | ||
) | ||
|
||
const ( | ||
ColumnID = "id" | ||
ColumnLabels = "labels" | ||
ColumnSampleType = "sample_type" | ||
ColumnSampleUnit = "sample_unit" | ||
ColumnLocationIDs = "location_ids" | ||
ColumnSamples = "samples" | ||
ColumnPprofLabels = "pprof_labels" | ||
ColumnDropFrames = "drop_frames" | ||
ColumnKeepFrames = "keep_frames" | ||
ColumnTimeNanos = "time_nanos" | ||
ColumnDurationNanos = "duration_nanos" | ||
ColumnPeriod = "period" | ||
ColumnPeriodType = "period_type" | ||
ColumnPeriodUnit = "period_unit" | ||
ColumnComments = "comments" | ||
) | ||
|
||
func Profiles() *dynparquet.Schema { | ||
stringRef := parquet.Encoded(parquet.Int(64), &parquet.DeltaBinaryPacked) | ||
|
||
labels := parquet.Repeated(parquet.Group{ | ||
"Name": stringRef, | ||
"Value": stringRef, | ||
}) | ||
|
||
pprofLabels := parquet.Repeated(parquet.Group{ | ||
"Key": stringRef, | ||
"Str": parquet.Optional(stringRef), | ||
"Num": parquet.Optional(parquet.Int(64)), | ||
"NumUnit": parquet.Optional(stringRef), | ||
}) | ||
|
||
return dynparquet.NewSchema( | ||
"profiles", | ||
[]dynparquet.ColumnDefinition{ | ||
{ | ||
Name: ColumnID, | ||
StorageLayout: parquet.Int(64), | ||
Dynamic: false, | ||
}, { | ||
Name: ColumnLabels, | ||
StorageLayout: labels, | ||
Dynamic: false, | ||
}, { | ||
Name: ColumnSampleType, | ||
StorageLayout: stringRef, | ||
Dynamic: false, | ||
}, { | ||
Name: ColumnSampleUnit, | ||
StorageLayout: stringRef, | ||
Dynamic: false, | ||
}, { | ||
Name: ColumnLocationIDs, | ||
StorageLayout: parquet.Repeated(parquet.Repeated(parquet.Uint(64))), | ||
Dynamic: false, | ||
}, { | ||
Name: ColumnSamples, | ||
StorageLayout: parquet.Repeated(parquet.Encoded(parquet.Int(64), &parquet.DeltaBinaryPacked)), | ||
Dynamic: false, | ||
}, { | ||
Name: ColumnPprofLabels, | ||
StorageLayout: pprofLabels, | ||
Dynamic: false, | ||
}, { | ||
Name: ColumnDropFrames, | ||
StorageLayout: stringRef, | ||
Dynamic: false, | ||
}, { | ||
Name: ColumnKeepFrames, | ||
StorageLayout: stringRef, | ||
Dynamic: false, | ||
}, { | ||
Name: ColumnTimeNanos, | ||
StorageLayout: parquet.Timestamp(parquet.Nanosecond), | ||
Dynamic: false, | ||
}, { | ||
Name: ColumnDurationNanos, | ||
StorageLayout: parquet.Int(64), | ||
Dynamic: false, | ||
}, { | ||
Name: ColumnPeriod, | ||
StorageLayout: parquet.Int(64), | ||
Dynamic: false, | ||
}, { | ||
Name: ColumnPeriodType, | ||
StorageLayout: stringRef, | ||
Dynamic: false, | ||
}, { | ||
Name: ColumnPeriodUnit, | ||
StorageLayout: stringRef, | ||
Dynamic: true, | ||
}, | ||
}, | ||
[]dynparquet.SortingColumn{ | ||
dynparquet.Ascending(ColumnID), | ||
dynparquet.Ascending(ColumnSampleType), | ||
dynparquet.Ascending(ColumnSampleUnit), | ||
dynparquet.Ascending(ColumnPeriodType), | ||
dynparquet.Ascending(ColumnPeriodUnit), | ||
dynparquet.NullsFirst(dynparquet.Ascending(ColumnLabels)), | ||
dynparquet.NullsFirst(dynparquet.Ascending(ColumnLocationIDs)), | ||
dynparquet.Ascending(ColumnTimeNanos), | ||
dynparquet.NullsFirst(dynparquet.Ascending(ColumnPprofLabels)), | ||
}, | ||
) | ||
} |