Skip to content

Commit

Permalink
Bzng write test (#507)
Browse files Browse the repository at this point in the history
Includes a new method for putting the hex representation of binary data into
a ztest yaml file that also allows comments so the binary data can
be annotated/explained by the test author.

Also fix a few spec issues.

The test is far from comprehensive but it would
at least have caught the bug fixed in PR #377.

Co-Authored-By: Alfred Landrum <alfred@leakybucket.org>
  • Loading branch information
aswan and alfred-landrum committed Apr 3, 2020
1 parent ffa883e commit 3ac86e9
Show file tree
Hide file tree
Showing 3 changed files with 110 additions and 7 deletions.
48 changes: 48 additions & 0 deletions tests/formats/bzng/write.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
zql: '*'

input: |
#0:record[_path:string,ts:time,d:float64]
0:[a;10;1;]
0:[xyz;20;1.5;]
output-format: bzng

outputHex: |
# define a record with 3 columns
80 03
# first column name is _path (len 5)
05 5f 70 61 74 68
# first column type is string (9)
09
# second column name is ts (len 2)
02 74 73
# second column type is time (16 == 0x10)
10
# third column name is d (len 1)
01 64
# third column type is float64 (8)
08
# value using type id 23 (0x17), the record defined above
# total length of this recor is 17 bytes (0x11)
17 11
# first column is a primitive value, 2 total bytes
04
# value of the first column is the string "a"
61
# second column is a primitive value, 6 total bytes
0c
# time value is encoded in nanoseconds shifted one bit left
# 2000000000 == 0x04a817c800
00 c8 17 a8 04
# third column is a primitive value, 9 total bytes
12
# 8 bytes of float64 data representing 1.0
00 00 00 00 00 00 f0 3f
# another encoded value using the same record definition as before
17 13
# first column: primitive value of 4 total byte, values xyz
08 78 79 7a
# second column: primitive value of 20 (converted to nanoseconds, encoded <<1)
0c 00 90 2f 50 09
# third column, primitive value of 9 total bytes, float64 1.5
12 00 00 00 00 00 00 f8 3f
6 changes: 3 additions & 3 deletions zng/docs/spec.md
Original file line number Diff line number Diff line change
Expand Up @@ -315,7 +315,7 @@ is sorted a certain way.
The hint is encoded as follows:
```
---------------------------------------
|0x84|<len>|[+-]<field>,[+-]<field>,...
|0x85|<len>|[+-]<field>,[+-]<field>,...
---------------------------------------
```
where the payload of the message is a length-counted UTF-8 string.
Expand Down Expand Up @@ -379,8 +379,8 @@ is interpreted as follows:
| `enum ` | variable | UTF-8 bytes of enum string |
| `ip` | 4 or 16 | 4 or 16 bytes of IP address |
| `net` | 8 or 32 | 8 or 32 bytes of IP prefix and subnet mask |
| `time` | 8 | 8 bytes of signed nanoseconds from epoch |
| `duration` | 8 | 8 bytes of signed nanoseconds duration |
| `time` | variable | signed nanoseconds since epoch |
| `duration` | variable | signed nanoseconds duration |
| `null` | 0 | No value, always represents an undefined value |

All multi-byte sequences representing machine words are serialized in
Expand Down
63 changes: 59 additions & 4 deletions ztest/ztest.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ package ztest

import (
"bytes"
"encoding/hex"
"errors"
"fmt"
"io"
Expand All @@ -67,6 +68,7 @@ import (
"regexp"
"strings"
"testing"
"unicode/utf8"

"github.com/brimsec/zq/driver"
"github.com/brimsec/zq/emitter"
Expand All @@ -76,6 +78,7 @@ import (
"github.com/brimsec/zq/zng/resolver"
"github.com/brimsec/zq/zql"
"github.com/pmezard/go-difflib/difflib"
"github.com/stretchr/testify/require"
"gopkg.in/yaml.v3"
)

Expand Down Expand Up @@ -128,11 +131,21 @@ func Run(t *testing.T, dirname string) {
} else if zt.errRegex != nil {
t.Fatalf("%s: no error when expecting error regex: %s", filename, zt.ErrorRE)
}
if out != zt.Output {
expectedOut, oerr := zt.getOutput()
require.NoError(t, oerr)
if out != expectedOut {
a := expectedOut
b := out

if !utf8.ValidString(a) {
a = encodeHex(a)
b = encodeHex(b)
}

diff, _ := difflib.GetUnifiedDiffString(difflib.UnifiedDiff{
A: difflib.SplitLines(zt.Output),
A: difflib.SplitLines(a),
FromFile: "expected",
B: difflib.SplitLines(out),
B: difflib.SplitLines(b),
ToFile: "actual",
Context: 5,
})
Expand All @@ -157,7 +170,8 @@ type ZTest struct {
ZQL string `yaml:"zql"`
Input Inputs `yaml:"input"`
OutputFormat string `yaml:"output-format,omitempty"`
Output string `yaml:"output"`
Output string `yaml:"output,omitempty"`
OutputHex string `yaml:"outputHex,omitempty"`
ErrorRE string `yaml:"errorRE"`
errRegex *regexp.Regexp
Warnings string `yaml:"warnings",omitempty"`
Expand All @@ -182,6 +196,47 @@ func (i *Inputs) UnmarshalYAML(value *yaml.Node) error {
return nil
}

// Try to decode a yaml-friendly way of representing binary data in hex:
// each line is either a comment explaining the contents (denoted with
// a leading # character), or a sequence of hex digits.
func decodeHex(in string) (string, error) {
var raw string
for _, line := range strings.Split(in, "\n") {
if len(line) == 0 || line[0] == '#' {
continue
}
raw += strings.ReplaceAll(line, " ", "")
}
out := make([]byte, hex.DecodedLen(len(raw)))
_, err := hex.Decode(out, []byte(raw))
if err != nil {
return "", err
}
return string(out), nil
}

func encodeHex(in string) string {
var buf bytes.Buffer
dumper := hex.Dumper(&buf)
dumper.Write([]byte(in))
return buf.String()
}

func (z *ZTest) getOutput() (string, error) {
outlen := len(z.Output)
hexlen := len(z.OutputHex)
if outlen > 0 && hexlen > 0 {
return "", errors.New("Cannot specify both output and outputHex")
}
if outlen == 0 && hexlen == 0 {
return "", nil
}
if outlen > 0 {
return z.Output, nil
}
return decodeHex(z.OutputHex)
}

// FromYAMLFile loads a ZTest from the YAML file named filename.
func FromYAMLFile(filename string) (*ZTest, error) {
buf, err := ioutil.ReadFile(filename)
Expand Down

0 comments on commit 3ac86e9

Please sign in to comment.