/
block_iterator.go
109 lines (87 loc) · 2.49 KB
/
block_iterator.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
package vparquet
import (
"context"
"fmt"
"io"
"github.com/pkg/errors"
"github.com/segmentio/parquet-go"
tempo_io "github.com/grafana/tempo/pkg/io"
"github.com/grafana/tempo/pkg/parquetquery"
"github.com/grafana/tempo/tempodb/encoding/common"
)
func (b *backendBlock) open(ctx context.Context) (*parquet.File, *parquet.Reader, error) {
rr := NewBackendReaderAt(ctx, b.r, DataFileName, b.meta.BlockID, b.meta.TenantID)
// 128 MB memory buffering
br := tempo_io.NewBufferedReaderAt(rr, int64(b.meta.Size), 2*1024*1024, 64)
pf, err := parquet.OpenFile(br, int64(b.meta.Size))
if err != nil {
return nil, nil, err
}
r := parquet.NewReader(pf, parquet.SchemaOf(&Trace{}))
return pf, r, nil
}
func (b *backendBlock) Iterator(ctx context.Context) (Iterator, error) {
_, r, err := b.open(ctx)
if err != nil {
return nil, err
}
return &blockIterator{blockID: b.meta.BlockID.String(), r: r}, nil
}
func (b *backendBlock) RawIterator(ctx context.Context, pool *rowPool) (*rawIterator, error) {
pf, r, err := b.open(ctx)
if err != nil {
return nil, err
}
traceIDIndex, _ := parquetquery.GetColumnIndexByPath(pf, TraceIDColumnName)
if traceIDIndex < 0 {
return nil, fmt.Errorf("cannot find trace ID column in '%s' in block '%s'", TraceIDColumnName, b.meta.BlockID.String())
}
return &rawIterator{b.meta.BlockID.String(), r, traceIDIndex, pool}, nil
}
type blockIterator struct {
blockID string
r *parquet.Reader
}
func (i *blockIterator) Next(context.Context) (*Trace, error) {
t := &Trace{}
switch err := i.r.Read(t); err {
case nil:
return t, nil
case io.EOF:
return nil, nil
default:
return nil, errors.Wrap(err, fmt.Sprintf("error iterating through block %s", i.blockID))
}
}
func (i *blockIterator) Close() {
// parquet reader is shared, lets not close it here
}
type rawIterator struct {
blockID string
r *parquet.Reader
traceIDIndex int
pool *rowPool
}
var _ RawIterator = (*rawIterator)(nil)
func (i *rawIterator) getTraceID(r parquet.Row) common.ID {
for _, v := range r {
if v.Column() == i.traceIDIndex {
return v.ByteArray()
}
}
return nil
}
func (i *rawIterator) Next(context.Context) (common.ID, parquet.Row, error) {
rows := []parquet.Row{i.pool.Get()}
n, err := i.r.ReadRows(rows)
if n > 0 {
return i.getTraceID(rows[0]), rows[0], nil
}
if err == io.EOF {
return nil, nil, nil
}
return nil, nil, errors.Wrap(err, fmt.Sprintf("error iterating through block %s", i.blockID))
}
func (i *rawIterator) Close() {
i.r.Close()
}