-
Notifications
You must be signed in to change notification settings - Fork 0
/
tsdata.go
301 lines (272 loc) · 8.14 KB
/
tsdata.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
// Package tsdata provides tools to manage TSData files. See
// https://github.com/armbrustlab/tsdataformat for a description of TSData
// files.
package tsdata
import (
"fmt"
"strconv"
"strings"
"time"
)
// Delim is the field separator string
const Delim = "\t"
// NA is the string used to represent missing data
const NA = "NA"
// HeaderSize is the number of lines in a header section
const HeaderSize = 7
// Tsdata defines a TSData file
type Tsdata struct {
checkers []func(string) bool
lastTime time.Time
FileType string
Project string
FileDescription string
Comments []string
Types []string
Units []string
Headers []string
}
// Data holds validated information for one TSDATA file line, with the original
// column strings in Fields and time in Time.
type Data struct {
Fields []string
Time time.Time
}
// ValidateLine checks values in a data line and returns all fields as a slice of
// strings. It returns an error for the first field that fails validation. It
// also returns an error if the timestamp in this line is earlier than the
// timestamp in the last line validated by this struct.
func (t *Tsdata) ValidateLine(line string, strict bool) (Data, error) {
fields := strings.Split(line, Delim)
if len(fields) < 2 {
// Need at least time column plus one data column
return Data{}, fmt.Errorf("found %v columns, expected >= 2", len(fields))
}
if len(fields) < len(t.Headers) {
return Data{}, fmt.Errorf("found %v columns, expected %v", len(fields), len(t.Headers))
}
fields = fields[:len(t.Headers)] // remove any extra fields
// Validate first time column separately here to make sure not NA
fields[0] = strings.TrimSpace(fields[0]) // remove leading/trailing whitespace
tline, err := parseTime(fields[0])
fields[0] = tline.Format(time.RFC3339Nano) // standardize time string
if err != nil {
return Data{}, fmt.Errorf("first time column, bad value '%v'", fields[0])
}
// Turn off time order check for now, it's sometimes too stringent.
//if tline.Sub(t.lastTime) < 0 {
// return Data{}, fmt.Errorf("timestamp less than previous line, %v < %v", tline, t.lastTime)
//}
for i := 1; i < len(fields); i++ { // skip first time column
// Remove leading/trailing whitespace from each data field
fields[i] = strings.TrimSpace(fields[i])
if t.Types[i] == "time" {
// Validate time fields as a special case to avoid parsing twice and to
// convert to a consistent RFC3339 string with 'T'
timeField, err := parseTime(fields[i])
if err != nil {
if fields[i] != NA && strict {
return Data{}, fmt.Errorf("column %v, bad value '%v'", i+1, fields[i])
}
fields[i] = NA
} else {
fields[i] = timeField.Format(time.RFC3339Nano)
}
} else {
if !t.checkers[i](fields[i]) {
if strict {
return Data{}, fmt.Errorf("column %v, bad value '%v'", i+1, fields[i])
}
fields[i] = NA
}
}
}
t.lastTime = tline
return Data{Fields: fields, Time: tline}, nil
}
// ParseHeader parses and validates header metadata. Input should a string of
// all lines in the file's header section.
func (t *Tsdata) ParseHeader(header string) error {
header = strings.TrimSuffix(header, "\n")
headerLines := strings.Split(header, "\n")
if len(headerLines) != HeaderSize {
return fmt.Errorf("expected %v lines in header, found %v", HeaderSize, len(headerLines))
}
// Remove trailing whitespace from each line
for i := 0; i < len(headerLines); i++ {
headerLines[i] = strings.TrimRight(headerLines[i], " \t\r")
}
t.FileType = strings.Split(headerLines[0], Delim)[0]
t.Project = strings.Split(headerLines[1], Delim)[0]
t.FileDescription = strings.Split(headerLines[2], Delim)[0]
if headerLines[3] != "" {
t.Comments = strings.Split(headerLines[3], Delim)
// Remove leading/trailing whitespace from each field
for i := 0; i < len(t.Comments); i++ {
t.Comments[i] = strings.TrimSpace(t.Comments[i])
}
}
if headerLines[4] != "" {
t.Types = strings.Split(headerLines[4], Delim)
// Remove leading/trailing whitespace from each field
for i := 0; i < len(t.Types); i++ {
t.Types[i] = strings.TrimSpace(t.Types[i])
}
}
if headerLines[5] != "" {
t.Units = strings.Split(headerLines[5], Delim)
// Remove leading/trailing whitespace from each field
for i := 0; i < len(t.Units); i++ {
t.Units[i] = strings.TrimSpace(t.Units[i])
}
}
if headerLines[6] != "" {
t.Headers = strings.Split(headerLines[6], Delim)
// Remove leading/trailing whitespace from each field
for i := 0; i < len(t.Headers); i++ {
t.Headers[i] = strings.TrimSpace(t.Headers[i])
}
}
t.checkers = make([]func(string) bool, len(t.Types))
for i, ty := range t.Types {
t.checkers[i] = typecheckers[ty]
}
return t.ValidateMetadata()
}
// ValidateMetadata checks for errors and inconsistencies in metadata values.
func (t *Tsdata) ValidateMetadata() error {
// FileType
if t.FileType == "" {
return fmt.Errorf("missing or empty FileType")
}
// Project
if t.Project == "" {
return fmt.Errorf("missing or empty Project")
}
// Comments
colCount := 0
// Column comments may be a blank line so allow 0 columns
if len(t.Comments) > 0 {
colCount = len(t.Comments)
for i, com := range t.Comments {
if com == "" {
return fmt.Errorf("empty comment in column %v", i+1)
}
}
}
// Types
if len(t.Types) == 0 {
return fmt.Errorf("missing or empty Types")
}
if colCount > 0 && len(t.Types) != colCount {
return fmt.Errorf("inconsistent Types column count")
}
for i, t := range t.Types {
_, ok := typecheckers[t]
if !ok {
return fmt.Errorf("bad Types value '%v' in column %v", t, i+1)
}
}
colCount = len(t.Types)
// Units
if len(t.Units) == 0 {
return fmt.Errorf("missing or empty Units")
}
if len(t.Units) != colCount {
return fmt.Errorf("inconsistent Units column count")
}
for i, u := range t.Units {
if u == "" {
return fmt.Errorf("empty Units value in column %v", i+1)
}
}
// Headers
if len(t.Headers) == 0 {
return fmt.Errorf("missing or empty Headers")
}
if len(t.Headers) != colCount {
return fmt.Errorf("inconsistent Headers column count")
}
if t.Headers[0] != "time" {
return fmt.Errorf("first Headers column should be 'time'")
}
for i, h := range t.Headers {
if h == "" {
return fmt.Errorf("empty Headers value in column %v", i+1)
}
}
// Finally column count should be > 1, meaning at least one data column
// after the first time column
if colCount < 2 {
return fmt.Errorf("no data columns after time")
}
return nil
}
// Header creates a TSData file metadata header paragraph.
func (t *Tsdata) Header() string {
// TODO: should this ever produce a non-conforming TSData header?
cols := len(t.Headers)
text := t.FileType + "\n"
text = text + t.Project + "\n"
text = text + t.FileDescription + "\n"
if len(t.Comments) == 0 {
text = text + nas(cols) + "\n"
} else {
text = text + strings.Join(t.Comments, Delim) + "\n"
}
text = text + strings.Join(t.Types, Delim) + "\n"
text = text + strings.Join(t.Units, Delim) + "\n"
text = text + strings.Join(t.Headers, Delim) // note, doesn't end with blank line
return text
}
// checkTime always assumes s is a valid RFC3339 timestamp. Must check
// separately.
func checkTime(s string) bool {
return true
}
func checkFloat(s string) bool {
_, err := strconv.ParseFloat(s, 64)
if err != nil {
return s == NA
}
return true
}
func checkInteger(s string) bool {
_, err := strconv.ParseInt(s, 10, 64)
if err != nil {
return s == NA
}
return true
}
func checkText(s string) bool {
return true
}
func checkCategory(s string) bool {
return s != ""
}
func checkBoolean(s string) bool {
return (s == "TRUE" || s == "FALSE" || s == NA)
}
var typecheckers = map[string]func(string) bool{
"time": checkTime,
"float": checkFloat,
"integer": checkInteger,
"text": checkText,
"category": checkCategory,
"boolean": checkBoolean,
}
func nas(size int) string {
s := make([]string, size)
for i := range s {
s[i] = NA
}
return strings.Join(s, Delim)
}
func parseTime(s string) (t time.Time, err error) {
t, err = time.Parse(time.RFC3339Nano, s)
if err != nil {
// Try with a space instead of a T
t, err = time.Parse(strings.Replace(time.RFC3339, "T", " ", 1), s)
}
return
}