forked from influxdata/influxdb
-
Notifications
You must be signed in to change notification settings - Fork 0
/
converter.go
153 lines (129 loc) · 3.14 KB
/
converter.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
package main
import (
"fmt"
"math"
"os"
"path/filepath"
"github.com/influxdb/influxdb/tsdb/engine/tsm1"
)
type KeyIterator interface {
Next() bool
Read() (string, []tsm1.Value, error)
}
// Converter encapsulates the logic for converting b*1 shards to tsm1 shards.
type Converter struct {
path string
maxTSMFileSize uint32
sequence int
tracker *tracker
}
// NewConverter returns a new instance of the Converter.
func NewConverter(path string, sz uint32, t *tracker) *Converter {
return &Converter{
path: path,
maxTSMFileSize: sz,
tracker: t,
}
}
// Process writes the data provided by iter to a tsm1 shard.
func (c *Converter) Process(iter KeyIterator) error {
// Ensure the tsm1 directory exists.
if err := os.MkdirAll(c.path, 0777); err != nil {
return err
}
// Iterate until no more data remains.
var w tsm1.TSMWriter
for iter.Next() {
k, v, err := iter.Read()
if err != nil {
return err
}
scrubbed := c.scrubValues(v)
if w == nil {
w, err = c.nextTSMWriter()
if err != nil {
return err
}
}
if err := w.Write(k, scrubbed); err != nil {
return err
}
c.tracker.AddPointsRead(len(v))
c.tracker.AddPointsWritten(len(scrubbed))
// If we have a max file size configured and we're over it, start a new TSM file.
if w.Size() > c.maxTSMFileSize {
if err := w.WriteIndex(); err != nil && err != tsm1.ErrNoValues {
return err
}
c.tracker.AddTSMBytes(w.Size())
if err := w.Close(); err != nil {
return err
}
w = nil
}
}
if w != nil {
if err := w.WriteIndex(); err != nil && err != tsm1.ErrNoValues {
return err
}
c.tracker.AddTSMBytes(w.Size())
if err := w.Close(); err != nil {
return err
}
}
return nil
}
// nextTSMWriter returns the next TSMWriter for the Converter.
func (c *Converter) nextTSMWriter() (tsm1.TSMWriter, error) {
c.sequence++
fileName := filepath.Join(c.path, fmt.Sprintf("%09d-%09d.%s", 1, c.sequence, tsm1.TSMFileExtension))
fd, err := os.OpenFile(fileName, os.O_CREATE|os.O_RDWR, 0666)
if err != nil {
return nil, err
}
// Create the writer for the new TSM file.
w, err := tsm1.NewTSMWriter(fd)
if err != nil {
return nil, err
}
c.tracker.IncrTSMFileCount()
return w, nil
}
// scrubValues takes a slice and removes float64 NaN and Inf. If neither is
// present in the slice, the original slice is returned. This is to avoid
// copying slices unnecessarily.
func (c *Converter) scrubValues(values []tsm1.Value) []tsm1.Value {
var scrubbed []tsm1.Value
if values == nil {
return nil
}
for i, v := range values {
if f, ok := v.Value().(float64); ok {
var filter bool
if math.IsNaN(f) {
filter = true
c.tracker.IncrNaN()
}
if math.IsInf(f, 0) {
filter = true
c.tracker.IncrInf()
}
if filter {
if scrubbed == nil {
// Take every value up to the NaN, indicating that scrubbed
// should now be used.
scrubbed = values[:i]
}
} else {
if scrubbed != nil {
// We've filtered at least 1 value, so add value to filtered slice.
scrubbed = append(scrubbed, v)
}
}
}
}
if scrubbed != nil {
return scrubbed
}
return values
}