Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add xml support as xq (from python yq) #215

Open
wants to merge 26 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
118 changes: 59 additions & 59 deletions builtin.go

Large diffs are not rendered by default.

58 changes: 49 additions & 9 deletions cli/cli.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,14 +4,12 @@ package cli
import (
"errors"
"fmt"
"github.com/itchyny/gojq"
"github.com/mattn/go-isatty"
"io"
"os"
"runtime"
"strings"

"github.com/mattn/go-isatty"

"github.com/itchyny/gojq"
)

const name = "gojq"
Expand Down Expand Up @@ -40,11 +38,21 @@ type cli struct {
outputCompact bool
outputIndent *int
outputTab bool
outputJSON bool
outputXML bool
outputYAML bool
inputRaw bool
inputStream bool
inputJSON bool
inputXML bool
inputYAML bool
inputSlurp bool
stripSpaceXML bool
stripAttrsXML bool
forceListXML []string
htmlXML bool
rootXML string
elementXML string

argnames []string
argvalues []any
Expand All @@ -60,13 +68,22 @@ type flagopts struct {
OutputCompact bool `short:"c" long:"compact-output" description:"output without pretty-printing"`
OutputIndent *int `long:"indent" description:"number of spaces for indentation"`
OutputTab bool `long:"tab" description:"use tabs for indentation"`
OutputYAML bool `long:"yaml-output" description:"output in YAML format"`
OutputYAML bool `short:"y" long:"yaml-output" description:"output in YAML format"`
OutputXML bool `short:"x" long:"xml-output" description:"output in XML format"`
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

(Note: I'm not the author of gojq, just an interested third party.)

I think it makes sense to add even more formats in the future (toml, msgpack, bson, etc), but this list of boolean flags will grow unmanageable.

What do you think about --input-format=xml and --output-format=xml instead of these format-specific parameters?

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure, looks to be a very good idea :)

OutputColor bool `short:"C" long:"color-output" description:"output with colors even if piped"`
OutputMono bool `short:"M" long:"monochrome-output" description:"output without colors"`
InputNull bool `short:"n" long:"null-input" description:"use null as input value"`
InputRaw bool `short:"R" long:"raw-input" description:"read input as raw strings"`
InputStream bool `long:"stream" description:"parse input in stream fashion"`
InputYAML bool `long:"yaml-input" description:"read input as YAML format"`
InputJSON bool `short:"J" long:"json-input" description:"read input as JSON format"`
InputXML bool `short:"X" long:"xml-input" description:"read input as XML format"`
StripAttrsXML bool `long:"xml-no-attributes" description:"remove attributes from XML elements"`
StripSpaceXML bool `long:"xml-no-namespaces" description:"remove namespace from XML elements and attributes"`
ForceListXML []string `long:"xml-force-list" description:"force XML elements as array"`
RootXML string `long:"xml-root" description:"root XML element name"`
ElementXML string `long:"xml-element" description:"element XML element name"`
HtmlXML bool `short:"H" long:"xml-html" description:"read input as XML with HTML compatibility mode"`
InputYAML bool `short:"Y" long:"yaml-input" description:"read input as YAML format"`
InputSlurp bool `short:"s" long:"slurp" description:"read all inputs into an array"`
FromFile bool `short:"f" long:"from-file" description:"load query from file"`
ModulePaths []string `short:"L" description:"directory to search modules from"`
Expand Down Expand Up @@ -123,9 +140,9 @@ Usage:
return nil
}
cli.outputRaw, cli.outputRaw0, cli.outputJoin,
cli.outputCompact, cli.outputIndent, cli.outputTab, cli.outputYAML =
cli.outputCompact, cli.outputIndent, cli.outputTab, cli.outputXML, cli.outputYAML =
opts.OutputRaw, opts.OutputRaw0, opts.OutputJoin,
opts.OutputCompact, opts.OutputIndent, opts.OutputTab, opts.OutputYAML
opts.OutputCompact, opts.OutputIndent, opts.OutputTab, opts.OutputXML, opts.OutputYAML
defer func(x bool) { noColor = x }(noColor)
if opts.OutputColor || opts.OutputMono {
noColor = opts.OutputMono
Expand Down Expand Up @@ -154,6 +171,8 @@ Usage:
}
cli.inputRaw, cli.inputStream, cli.inputYAML, cli.inputSlurp =
opts.InputRaw, opts.InputStream, opts.InputYAML, opts.InputSlurp
cli.inputJSON, cli.inputXML, cli.stripAttrsXML, cli.stripSpaceXML, cli.forceListXML, cli.rootXML, cli.elementXML, cli.htmlXML =
opts.InputJSON, opts.InputXML, opts.StripAttrsXML, opts.StripSpaceXML, opts.ForceListXML, opts.RootXML, opts.ElementXML, opts.HtmlXML
for k, v := range opts.Arg {
cli.argnames = append(cli.argnames, "$"+k)
cli.argvalues = append(cli.argvalues, v)
Expand Down Expand Up @@ -300,10 +319,28 @@ func (cli *cli) createInputIter(args []string) (iter inputIter) {
}
case cli.inputStream:
newIter = newStreamInputIter
case cli.inputJSON:
newIter = newJSONInputIter
case cli.inputXML || cli.htmlXML:
newIter = func(r io.Reader, fname string) inputIter {
return newXMLInputIter(r, fname, !cli.stripAttrsXML, !cli.stripSpaceXML, cli.forceListXML, cli.htmlXML)
}
case cli.inputYAML:
newIter = newYAMLInputIter
default:
newIter = newJSONInputIter
// automatically detect between JSON / YAML / XML format
newIter = func(r io.Reader, fname string) inputIter {
rd, f := detectInputType(r, 100)
switch f {
case JsonFormat:
return newJSONInputIter(rd, fname)
case YamlFormat:
return newYAMLInputIter(rd, fname)
case XmlFormat:
return newXMLInputIter(rd, fname, !cli.stripAttrsXML, !cli.stripSpaceXML, cli.forceListXML, cli.htmlXML)
}
return newJSONInputIter(rd, fname)
}
}
if cli.inputSlurp {
defer func() {
Expand Down Expand Up @@ -404,6 +441,9 @@ func (cli *cli) createMarshaler() marshaler {
} else if i := cli.outputIndent; i != nil {
indent = *i
}
if cli.outputXML {
return xmlFormatter(&indent, cli.rootXML, cli.elementXML)
}
f := newEncoder(cli.outputTab, indent)
if cli.outputRaw || cli.outputRaw0 || cli.outputJoin {
return &rawMarshaler{f, cli.outputRaw0}
Expand Down
184 changes: 184 additions & 0 deletions cli/detect.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,184 @@
package cli

import (
"bytes"
"io"
)

type DetectedFormat int

func (d DetectedFormat) String() string {
switch d {
case JsonFormat:
return "json"
case YamlFormat:
return "yaml"
case XmlFormat:
return "xml"
}
return ""
}

const (
JsonFormat DetectedFormat = iota
YamlFormat
XmlFormat
)

func detectInputType(r io.Reader, bufSize int) (io.Reader, DetectedFormat) {
readers := make([]io.Reader, 0)
var buf []byte
index := 0
length := 0
var err error
result := func(t DetectedFormat) (io.Reader, DetectedFormat) {
readers = append(readers, r)
return io.MultiReader(readers...), t
}
readByte := func() (byte, error) {
if index == length {
if err != nil {
return 0, err
}
buf = make([]byte, bufSize)
length, err = r.Read(buf)
if length == 0 && err != nil {
return 0, err
}
readers = append(readers, bytes.NewReader(buf[0:length]))
index = 0
}
i := index
index = index + 1
return buf[i], nil
}

// state machine
state := "loop"
var b, c byte
loop:
for {
switch state {
// main loop
case "loop":
for {
b, err = readByte()
if err != nil {
return result(JsonFormat)
}
switch b {
case ' ', '\t', '\r', '\n':
case '{', '[', '/':
return result(JsonFormat)
case '#':
return result(YamlFormat)
case '<':
return result(XmlFormat)
case '-':
// yaml if "- " or "---"
c, err = readByte()
if err != nil {
return result(JsonFormat)
}
if c == ' ' {
return result(YamlFormat)
}
if c != '-' {
return result(JsonFormat)
}
c, err = readByte()
if err != nil || c != '-' {
return result(JsonFormat)
}
return result(YamlFormat)
case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '.':
return result(JsonFormat)
case '"':
// string can be either a json/yaml text
state = "string"
c = b
continue loop
case 't':
// json if true
for _, c = range []byte("rue") {
b, err = readByte()
if err != nil || b != c {
return result(YamlFormat)
}
}
state = "after"
continue loop
case 'f':
// json if false
for _, c = range []byte("alse") {
b, err = readByte()
if err != nil || b != c {
return result(YamlFormat)
}
}
state = "after"
continue loop
case 'n':
// json if false
for _, c = range []byte("ull") {
b, err = readByte()
if err != nil || b != c {
return result(YamlFormat)
}
}
state = "after"
continue loop
default:
// neither a number or string with "
return result(YamlFormat)
}
}
// string, started by "
case "string":
escape := false
for {
b, err = readByte()
if err != nil {
return result(JsonFormat)
}
if escape {
continue
}
switch b {
case ' ', '\t':
case '\r', '\n':
// new line not allowed in yaml tags
result(JsonFormat)
case '\\':
// escape next character
escape = true
case c:
// close string, look for next char to identify if it is yaml tag
state = "after"
continue loop
}
}
// close string, look for next char to identify if it is yaml tag
case "after":
for {
b, err = readByte()
if err != nil {
return result(JsonFormat)
}
switch b {
case ' ', '\t':
case '\r', '\n':
// new line not allowed in yaml tags
return result(JsonFormat)
case ':':
// it is a yaml tag
return result(YamlFormat)
default:
// it is not a yaml tag
return result(JsonFormat)
}
}
}

}
}
56 changes: 56 additions & 0 deletions cli/detect_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
package cli

import (
"bytes"
"io"
"strings"
"testing"
)

func TestDetectInputType(t *testing.T) {
for _, s := range []string{"", "\t", "\r", "\n", " ", " \t", " \r", " \n", " \t ", " \r ", " \n "} {
testDetectInputType(t, s+"", JsonFormat)
testDetectInputType(t, s+"{", JsonFormat)
testDetectInputType(t, s+"#", YamlFormat)
testDetectInputType(t, s+"<", XmlFormat)
testDetectInputType(t, s+"a", YamlFormat)
testDetectInputType(t, s+"a:", YamlFormat)
testDetectInputType(t, s+"a: 1", YamlFormat)
testDetectInputType(t, s+"true", JsonFormat)
testDetectInputType(t, s+"true true", JsonFormat)
testDetectInputType(t, s+"true:", YamlFormat)
testDetectInputType(t, s+"null", JsonFormat)
testDetectInputType(t, s+"null null", JsonFormat)
testDetectInputType(t, s+"null:", YamlFormat)
testDetectInputType(t, s+"false", JsonFormat)
testDetectInputType(t, s+"false false", JsonFormat)
testDetectInputType(t, s+"false:", YamlFormat)
testDetectInputType(t, s+"1", JsonFormat)
testDetectInputType(t, s+"-1", JsonFormat)
testDetectInputType(t, s+"-1e3", JsonFormat)
testDetectInputType(t, s+"- ", YamlFormat)
testDetectInputType(t, s+"--", JsonFormat)
testDetectInputType(t, s+"---", YamlFormat)
testDetectInputType(t, s+`"hello"`, JsonFormat)
testDetectInputType(t, s+`"hello":1`, YamlFormat)
testDetectInputType(t, s+`"hello": 1`, YamlFormat)
testDetectInputType(t, s+`'hello'`, YamlFormat)
testDetectInputType(t, s+`'hello':1`, YamlFormat)
testDetectInputType(t, s+`'hello': 1`, YamlFormat)
}
}

func testDetectInputType(t *testing.T, s string, format DetectedFormat) {
r, f := detectInputType(strings.NewReader(s), 1)
if f != format {
t.Fatalf("failed: invalid format '%s' expected '%s' for string '%s'", f, format, s)
}
buf := new(bytes.Buffer)
_, err := io.Copy(buf, r)
if err != nil {
t.Fatalf("failed: copy error for string '%s'", s)
}
if buf.String() != s {
t.Fatalf("failed: invalid reader content '%s'' for string '%s'", buf.String(), s)
}
}
Loading