From 97de1f053d222897dd8a047a6325a41abaf52741 Mon Sep 17 00:00:00 2001 From: jf-tech Date: Sat, 5 Sep 2020 09:19:35 +1200 Subject: [PATCH] Complete omniv2.transform code: all the parse???? methods; add a few more custom funcs. --- omniparser/customfuncs/customFuncs.go | 100 +- omniparser/customfuncs/customFuncs_test.go | 396 +++++++ omniparser/parser.go | 12 +- omniparser/schemaplugin/header_test.go | 10 +- .../omni/v2/transform/decl_test.go | 4 +- .../omni/v2/transform/invokeCustomFunc.go | 121 +++ .../v2/transform/invokeCustomFunc_test.go | 251 +++++ .../omni/v2/transform/nodeToObject.go | 115 +++ .../omni/v2/transform/nodeToObject_test.go | 151 +++ .../schemaplugin/omni/v2/transform/parse.go | 345 +++++++ .../omni/v2/transform/parse_test.go | 966 ++++++++++++++++++ .../omni/v2/transform/validate_test.go | 21 +- omniparser/transformctx/ctx.go | 12 + omniparser/transformctx/ctx_test.go | 53 + strs/strs.go | 23 + strs/strs_test.go | 50 +- testlib/testlib.go | 6 - testlib/testlib_test.go | 6 - 18 files changed, 2600 insertions(+), 42 deletions(-) create mode 100644 omniparser/schemaplugin/omni/v2/transform/invokeCustomFunc.go create mode 100644 omniparser/schemaplugin/omni/v2/transform/invokeCustomFunc_test.go create mode 100644 omniparser/schemaplugin/omni/v2/transform/nodeToObject.go create mode 100644 omniparser/schemaplugin/omni/v2/transform/nodeToObject_test.go create mode 100644 omniparser/schemaplugin/omni/v2/transform/parse.go create mode 100644 omniparser/schemaplugin/omni/v2/transform/parse_test.go create mode 100644 omniparser/transformctx/ctx_test.go diff --git a/omniparser/customfuncs/customFuncs.go b/omniparser/customfuncs/customFuncs.go index 79e0057..36fe186 100644 --- a/omniparser/customfuncs/customFuncs.go +++ b/omniparser/customfuncs/customFuncs.go @@ -2,8 +2,13 @@ package customfuncs import ( "bytes" + "encoding/json" + "fmt" + "strconv" + "strings" "github.com/jf-tech/omniparser/omniparser/transformctx" + "github.com/jf-tech/omniparser/strs" ) // CustomFuncType is the type of a custom function. Has to use interface{} given we support @@ -16,7 +21,23 @@ type CustomFuncs = map[string]CustomFuncType // BuiltinCustomFuncs contains all the built-in custom functions. var BuiltinCustomFuncs = map[string]CustomFuncType{ // keep these custom funcs lexically sorted - "concat": concat, + "concat": concat, + "external": external, + "lower": lower, + "splitIntoJsonArray": splitIntoJsonArray, + "substring": substring, + "upper": upper, +} + +// Merge merges multiple custom func maps into one. +func Merge(funcs ...CustomFuncs) CustomFuncs { + merged := make(CustomFuncs) + for _, fs := range funcs { + for name, f := range fs { + merged[name] = f + } + } + return merged } func concat(_ *transformctx.Ctx, strs ...string) (string, error) { @@ -26,3 +47,80 @@ func concat(_ *transformctx.Ctx, strs ...string) (string, error) { } return b.String(), nil } + +func external(ctx *transformctx.Ctx, name string) (string, error) { + if v, found := ctx.ExternalProperty(name); found { + return v, nil + } + return "", fmt.Errorf("cannot find external property '%s'", name) +} + +func lower(_ *transformctx.Ctx, s string) (string, error) { + return strings.ToLower(s), nil +} + +// Splits a `s` into substrings separated by `sep` and return an string array represented in json. +// `trim` indicates whether each of the separated substrings will be space-trimmed or not. if `trim` +// is "", it defaults to "false". +// e.g. str = "a,b, c", sep = ",", trim="", result will be `["a", "b", " c"]`. +// e.g. str = "a,b, c", sep = ",", trim="true", result will be `["a", "b", "c"]`. +func splitIntoJsonArray(_ *transformctx.Ctx, s, sep string, trim string) (string, error) { + if sep == "" { + return "", fmt.Errorf("'sep' can't be empty") + } + if s == "" { + return "[]", nil + } + toTrim := false + var err error + if strs.IsStrNonBlank(trim) { + toTrim, err = strconv.ParseBool(strings.TrimSpace(trim)) + if err != nil { + return "", fmt.Errorf( + `'trim' must be either "" (default to "false"") or "true" or "false". err: %s`, err.Error()) + } + } + splits := strings.Split(s, sep) + if toTrim { + splits = strs.NoErrMapSlice(splits, func(s string) string { + return strings.TrimSpace(s) + }) + } + // strings.Split always returns a valid non-nil slice (could be empty), thus json marshaling + // will always succeed. + b, _ := json.Marshal(splits) + return string(b), nil +} + +func substring(_ *transformctx.Ctx, str, startIndex, lengthStr string) (string, error) { + start, err := strconv.Atoi(startIndex) + if err != nil { + return "", fmt.Errorf("unable to convert start index '%s' into int, err: %s", startIndex, err.Error()) + } + length, err := strconv.Atoi(lengthStr) + if err != nil { + return "", fmt.Errorf("unable to convert length '%s' into int, err: %s", lengthStr, err.Error()) + } + if length < -1 { + return "", fmt.Errorf("length must be >= -1, but got %d", length) + } + // We can/do deal with UTF-8 encoded strings. startIndex and length are all about + // UTF-8 characters not just bytes. + runes := []rune(str) + runeLen := len(runes) + if start < 0 || start > runeLen { + return "", fmt.Errorf("start index %d is out of bounds (string length is %d)", start, runeLen) + } + if length == -1 { + length = runeLen - start + } + if start+length > runeLen { + return "", fmt.Errorf( + "start %d + length %d is out of bounds (string length is %d)", start, length, runeLen) + } + return string(runes[start : start+length]), nil +} + +func upper(_ *transformctx.Ctx, s string) (string, error) { + return strings.ToUpper(s), nil +} diff --git a/omniparser/customfuncs/customFuncs_test.go b/omniparser/customfuncs/customFuncs_test.go index 67cc905..b7aec7f 100644 --- a/omniparser/customfuncs/customFuncs_test.go +++ b/omniparser/customfuncs/customFuncs_test.go @@ -4,8 +4,29 @@ import ( "testing" "github.com/stretchr/testify/assert" + + "github.com/jf-tech/omniparser/omniparser/transformctx" + "github.com/jf-tech/omniparser/strs" ) +func TestMerge(t *testing.T) { + fs1 := CustomFuncs{ + "a": 1, + "b": 2, + } + fs2 := CustomFuncs{ + "a": 3, + "c": 4, + } + assert.Equal(t, + CustomFuncs{ + "a": 3, + "b": 2, + "c": 4, + }, + Merge(fs1, fs2, nil)) +} + func TestConcat(t *testing.T) { for _, test := range []struct { name string @@ -40,3 +61,378 @@ func TestConcat(t *testing.T) { }) } } + +func TestExternal(t *testing.T) { + for _, test := range []struct { + name string + externalProperties map[string]string + propNameToLookUp string + expectedErr string + expectedValue string + }{ + { + name: "externalProperties nil", + externalProperties: nil, + propNameToLookUp: "abc", + expectedErr: "cannot find external property 'abc'", + expectedValue: "", + }, + { + name: "externalProperties empty", + externalProperties: map[string]string{}, + propNameToLookUp: "efg", + expectedErr: "cannot find external property 'efg'", + expectedValue: "", + }, + { + name: "can't find prop", + externalProperties: map[string]string{"abc": "abc"}, + propNameToLookUp: "efg", + expectedErr: "cannot find external property 'efg'", + expectedValue: "", + }, + { + name: "found", + externalProperties: map[string]string{"abc": "123"}, + propNameToLookUp: "abc", + expectedErr: "", + expectedValue: "123", + }, + } { + t.Run(test.name, func(t *testing.T) { + v, err := external( + &transformctx.Ctx{ExternalProperties: test.externalProperties}, + test.propNameToLookUp, + ) + switch { + case strs.IsStrNonBlank(test.expectedErr): + assert.Error(t, err) + assert.Equal(t, test.expectedErr, err.Error()) + assert.Equal(t, "", v) + default: + assert.NoError(t, err) + assert.Equal(t, test.expectedValue, v) + } + }) + } +} + +func TestLower(t *testing.T) { + s, err := lower(nil, "") + assert.NoError(t, err) + assert.Equal(t, "", s) + + s, err = lower(nil, "AbCeDfG 0123456789") + assert.NoError(t, err) + assert.Equal(t, "abcedfg 0123456789", s) +} + +func TestSplitIntoJsonArray(t *testing.T) { + // success cases + for _, test := range []struct { + name string + s string + sep string + trim string + expected string + expectedErr string + }{ + { + name: "both empty", + s: "", + sep: "", + trim: "", + expected: "", + expectedErr: `'sep' can't be empty`, + }, + { + name: "s with several spaces and sep empty", + s: " ", + sep: "", + trim: "true", + expected: "", + expectedErr: `'sep' can't be empty`, + }, + { + name: "s with several spaces and sep space", + s: " ", + sep: " ", + trim: "true", + expected: `["","",""]`, + expectedErr: "", + }, + { + name: "s with several spaces and sep non-space", + s: " ", + sep: ",", + trim: "true", + expected: `[""]`, + expectedErr: "", + }, + { + name: "s empty", + s: "", + sep: ",", + trim: "true", + expected: "[]", + expectedErr: "", + }, + { + name: "sep empty", + s: "ab c", + sep: "", + trim: "true", + expected: "", + expectedErr: `'sep' can't be empty`, + }, + { + name: "both not empty; no trim", + s: "a>b> c", + sep: ">", + trim: "", + expected: `["a","b"," c"]`, // reason is: if sep is empty, s is split into each char. + expectedErr: "", + }, + { + name: "both not empty; trim", + s: "a>b> c", + sep: ">", + trim: "true", + expected: `["a","b","c"]`, // reason is: if sep is empty, s is split into each char. + expectedErr: "", + }, + } { + t.Run(test.name, func(t *testing.T) { + result, err := splitIntoJsonArray(nil, test.s, test.sep, test.trim) + if test.expectedErr != "" { + assert.Error(t, err) + assert.Equal(t, test.expectedErr, err.Error()) + assert.Equal(t, "", result) + } else { + assert.NoError(t, err) + assert.Equal(t, test.expected, result) + } + }) + } + + // failure case + t.Run("invalid trim", func(t *testing.T) { + result, err := splitIntoJsonArray(nil, "a,b,c", ",", "invalid") + assert.Error(t, err) + assert.Equal(t, + `'trim' must be either "" (default to "false"") or "true" or "false". err: strconv.ParseBool: parsing "invalid": invalid syntax`, + err.Error()) + assert.Equal(t, "", result) + }) +} + +func TestSubstring(t *testing.T) { + tests := []struct { + name string + str string + startIndex string + lengthStr string + expected string + expectedErr string + }{ + { + name: "invalid startIndex", + str: "123456", + startIndex: "abc", + lengthStr: "5", + expected: "", + expectedErr: `unable to convert start index 'abc' into int, err: strconv.Atoi: parsing "abc": invalid syntax`, + }, + { + name: "invalid lengthStr", + str: "123456", + startIndex: "5", + lengthStr: "abc", + expected: "", + expectedErr: `unable to convert length 'abc' into int, err: strconv.Atoi: parsing "abc": invalid syntax`, + }, + { + name: "empty startIndex", + str: "123456", + startIndex: "", + lengthStr: "5", + expected: "", + expectedErr: `unable to convert start index '' into int, err: strconv.Atoi: parsing "": invalid syntax`, + }, + { + name: "empty lengthStr", + str: "123456", + startIndex: "5", + lengthStr: "", + expected: "", + expectedErr: `unable to convert length '' into int, err: strconv.Atoi: parsing "": invalid syntax`, + }, + { + name: "empty str", + str: "", + startIndex: "0", + lengthStr: "0", + expected: "", + expectedErr: "", + }, + { + name: "empty str with non-0 startIndex", + str: "", + startIndex: "1", + lengthStr: "0", + expected: "", + expectedErr: `start index 1 is out of bounds (string length is 0)`, + }, + { + name: "empty str with non-0 lengthStr", + str: "", + startIndex: "0", + lengthStr: "1", + expected: "", + expectedErr: `start 0 + length 1 is out of bounds (string length is 0)`, + }, + { + name: "0 startIndex", + str: "123456", + startIndex: "0", + lengthStr: "4", + expected: "1234", + expectedErr: "", + }, + { + name: "lengthStr is 1", + str: "123456", + startIndex: "4", + lengthStr: "1", + expected: "5", + expectedErr: "", + }, + { + name: "lengthStr is 0", + str: "123456", + startIndex: "1", + lengthStr: "0", + expected: "", + expectedErr: "", + }, + { + name: "lengthStr is -1", + str: "123456", + startIndex: "3", + lengthStr: "-1", + expected: "456", + expectedErr: "", + }, + { + name: "negative startIndex", + str: "123456", + startIndex: "-4", + lengthStr: "4", + expected: "", + expectedErr: `start index -4 is out of bounds (string length is 6)`, + }, + { + name: "negative lengthStr other than -1", + str: "123456", + startIndex: "4", + lengthStr: "-2", + expected: "", + expectedErr: `length must be >= -1, but got -2`, + }, + { + name: "out-of-bounds startIndex", + str: "123456", + startIndex: "9", + lengthStr: "2", + expected: "", + expectedErr: `start index 9 is out of bounds (string length is 6)`, + }, + { + name: "out-of-bounds lengthStr", + str: "123456", + startIndex: "2", + lengthStr: "7", + expected: "", + expectedErr: `start 2 + length 7 is out of bounds (string length is 6)`, + }, + { + name: "out-of-bounds startIndex and lengthStr", + str: "123456", + startIndex: "10", + lengthStr: "9", + expected: "", + expectedErr: `start index 10 is out of bounds (string length is 6)`, + }, + { + name: "substring starts at the beginning", + str: "123456", + startIndex: "0", + lengthStr: "4", + expected: "1234", + expectedErr: "", + }, + { + name: "substring ends at the end", + str: "123456", + startIndex: "2", + lengthStr: "4", + expected: "3456", + expectedErr: "", + }, + { + name: "substring starts at the end", + str: "123456", + startIndex: "6", + lengthStr: "0", + expected: "", + expectedErr: "", + }, + { + name: "substring ends at the beginning", + str: "123456", + startIndex: "0", + lengthStr: "0", + expected: "", + expectedErr: "", + }, + { + name: "substring is the whole string", + str: "123456", + startIndex: "0", + lengthStr: "6", + expected: "123456", + expectedErr: "", + }, + { + name: "non-ASCII string", + str: "ü:ü", + startIndex: "1", + lengthStr: "2", + expected: ":ü", + expectedErr: "", + }, + } + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + result, err := substring(nil, test.str, test.startIndex, test.lengthStr) + if test.expectedErr == "" { + assert.NoError(t, err) + assert.Equal(t, test.expected, result) + } else { + assert.Error(t, err) + assert.Equal(t, test.expectedErr, err.Error()) + assert.Equal(t, "", result) + } + }) + } +} + +func TestUpper(t *testing.T) { + s, err := upper(nil, "") + assert.NoError(t, err) + assert.Equal(t, "", s) + + s, err = upper(nil, "abCeDfG 0123456789") + assert.NoError(t, err) + assert.Equal(t, "ABCEDFG 0123456789", s) +} diff --git a/omniparser/parser.go b/omniparser/parser.go index 23a7a73..c4ceeaa 100644 --- a/omniparser/parser.go +++ b/omniparser/parser.go @@ -100,17 +100,15 @@ func NewParser(schemaName string, schemaReader io.Reader, exts ...Extension) (Pa } func collectCustomFuncs(exts []Extension) customfuncs.CustomFuncs { - funcs := make(customfuncs.CustomFuncs) + var funcs customfuncs.CustomFuncs for _, ext := range exts { if ext.CustomFuncs == nil { continue } - for name, f := range ext.CustomFuncs { - // This does mean any 3rd party extension custom funcs name-collide with - // builtin custom funcs, they will be overwritten by builtin ones (because - // argument exts always put builtin exts at last), which makes sense. :) - funcs[name] = f - } + // This does mean if any 3rd party extension custom funcs name-collide with + // builtin custom funcs, they will be overwritten by builtin ones (because + // argument exts always have builtin exts at last), which makes sense. :) + funcs = customfuncs.Merge(funcs, ext.CustomFuncs) } return funcs } diff --git a/omniparser/schemaplugin/header_test.go b/omniparser/schemaplugin/header_test.go index 113eb4d..97d2b87 100644 --- a/omniparser/schemaplugin/header_test.go +++ b/omniparser/schemaplugin/header_test.go @@ -13,7 +13,7 @@ import ( "golang.org/x/text/encoding/charmap" "github.com/jf-tech/omniparser/jsons" - "github.com/jf-tech/omniparser/testlib" + "github.com/jf-tech/omniparser/strs" ) func TestSupportedEncodingMappingsDump(t *testing.T) { @@ -45,18 +45,18 @@ func TestWrapEncoding(t *testing.T) { assert.Equal(t, "test", readAll(ParserSettings{}.WrapEncoding(strings.NewReader("test")))) // 'parser_settings.encoding' = UTF-8 assert.Equal(t, "test", readAll( - ParserSettings{Encoding: testlib.StrPtr(encodingUTF8)}.WrapEncoding(strings.NewReader("test")))) + ParserSettings{Encoding: strs.StrPtr(encodingUTF8)}.WrapEncoding(strings.NewReader("test")))) // 'parser_settings.encoding' = ==> UTF-8 assert.Equal(t, "test", readAll( - ParserSettings{Encoding: testlib.StrPtr("unknown")}.WrapEncoding(strings.NewReader("test")))) + ParserSettings{Encoding: strs.StrPtr("unknown")}.WrapEncoding(strings.NewReader("test")))) // 'parser_settings.encoding' = ISO-8859-1 iso88591bytes, err := charmap.ISO8859_1.NewEncoder().Bytes([]byte("test")) assert.NoError(t, err) assert.Equal(t, "test", readAll( - ParserSettings{Encoding: testlib.StrPtr(encodingISO8859_1)}.WrapEncoding(bytes.NewReader(iso88591bytes)))) + ParserSettings{Encoding: strs.StrPtr(encodingISO8859_1)}.WrapEncoding(bytes.NewReader(iso88591bytes)))) // 'parser_settings.encoding' = windows-1252 windows1252bytes, err := charmap.Windows1252.NewEncoder().Bytes([]byte("test")) assert.NoError(t, err) assert.Equal(t, "test", readAll( - ParserSettings{Encoding: testlib.StrPtr(encodingWindows1252)}.WrapEncoding(bytes.NewReader(windows1252bytes)))) + ParserSettings{Encoding: strs.StrPtr(encodingWindows1252)}.WrapEncoding(bytes.NewReader(windows1252bytes)))) } diff --git a/omniparser/schemaplugin/omni/v2/transform/decl_test.go b/omniparser/schemaplugin/omni/v2/transform/decl_test.go index 03e3153..bd94db9 100644 --- a/omniparser/schemaplugin/omni/v2/transform/decl_test.go +++ b/omniparser/schemaplugin/omni/v2/transform/decl_test.go @@ -10,7 +10,7 @@ import ( "github.com/stretchr/testify/assert" "github.com/jf-tech/omniparser/jsons" - "github.com/jf-tech/omniparser/testlib" + "github.com/jf-tech/omniparser/strs" ) func TestMarshalDecl(t *testing.T) { @@ -110,7 +110,7 @@ func TestIsPrimitiveKind(t *testing.T) { } func TestIsXPathSet(t *testing.T) { - assert.True(t, (&Decl{XPath: testlib.StrPtr("A/B/C")}).isXPathSet()) + assert.True(t, (&Decl{XPath: strs.StrPtr("A/B/C")}).isXPathSet()) assert.True(t, (&Decl{XPathDynamic: &Decl{}}).isXPathSet()) assert.False(t, (&Decl{}).isXPathSet()) } diff --git a/omniparser/schemaplugin/omni/v2/transform/invokeCustomFunc.go b/omniparser/schemaplugin/omni/v2/transform/invokeCustomFunc.go new file mode 100644 index 0000000..a67db7c --- /dev/null +++ b/omniparser/schemaplugin/omni/v2/transform/invokeCustomFunc.go @@ -0,0 +1,121 @@ +package transform + +import ( + "fmt" + "reflect" + + node "github.com/antchfx/xmlquery" + + "github.com/jf-tech/omniparser/omniparser/customfuncs" + "github.com/jf-tech/omniparser/omniparser/nodes" +) + +func (p *parseCtx) invokeCustomFunc(n *node.Node, customFuncDecl *CustomFuncDecl) (string, error) { + // In validation, we've validated the custom func exists. + fn, _ := p.customFuncs[customFuncDecl.Name] + argValues, err := p.prepCustomFuncArgValues(n, customFuncDecl, fn) + if err != nil { + return "", err + } + + result := reflect.ValueOf(fn).Call(argValues) + + // result[0] - result from custom function + // result[1] - error from custom function + if result[1].Interface() == nil { + return result[0].String(), nil + } + err = result[1].Interface().(error) + + if customFuncDecl.IgnoreErrorAndReturnEmptyStr { + return "", nil + } + + return "", fmt.Errorf("'%s' failed: %s", customFuncDecl.fqdn, err.Error()) +} + +func (p *parseCtx) prepCustomFuncArgValues( + n *node.Node, customFuncDecl *CustomFuncDecl, fn customfuncs.CustomFuncType) ([]reflect.Value, error) { + + argValues := []reflect.Value{reflect.ValueOf(p.opCtx)} + appendArgValue := func(argDecl *Decl, value interface{}) { + v, _ := normalizeAndReturnValue(argDecl, value) + // if v is nil for some reason, e.g: + // ----------- + // "date": { "custom_func": { + // "name": "dateTimeToRfc3339", + // "args": [ + // { "xpath": "DATE" }, + // { "const": "", "_comment": "input timezone" }, + // { "const": "", "_comment": "output timezone" } + // ] + // }}, + // ----------- + // In the example above, arg[2] and arg[3] is empty string, which will be converted to nil by + // normalizeAndReturnValue because keep_empty_or_null isn't specified, which is the typical case + // for schema author, we always want to use empty string in case of nil value for custom func args. + switch v { + case nil: + v = "" + default: + v = v.(string) + } + argValues = append(argValues, reflect.ValueOf(v)) + } + + for _, argDecl := range customFuncDecl.Args { + // We'd love to delegate all the value calculation to parseNode but here we have + // one special case, when we deal with a field. + // We have situations we need to support aggregation func such as sum/avg. In those cases + // the arg to the custom func can be a field with xpath/xpath_dynamic that we want it to + // yield multiple values to feed into those agg funcs. + switch argDecl.kind { + case KindField: + xpath, dynamic, err := p.computeXPath(n, argDecl) + if err != nil { + return nil, err + } + argValueNodes, err := nodes.MatchAll(n, xpath, xpathMatchFlags(dynamic)) + if err != nil { + return nil, fmt.Errorf("xpath query '%s' for '%s' failed: %s", xpath, argDecl.fqdn, err.Error()) + } + if reflect.TypeOf(fn).IsVariadic() && len(customFuncDecl.Args) == 1 { + // Only allow this variable length nodes to args conversion for variadic custom func. + // and this xpath arg is the **only** argument for this custom func. + for _, argValueNode := range argValueNodes { + appendArgValue(argDecl, argValueNode.InnerText()) + } + break + } + // fn is NOT variadic or this xpath arg isn't the only argument for the custom func + if len(argValueNodes) == 0 { + // A bit ugly. If the custom func is not variadic or xpath isn't the only arg, and + // xpath query returned nothing, then use "" empty as the arg value. This is inline + // with previous logic to reduce regression risk + appendArgValue(argDecl, "") + break + } + // fn is NOT variadic and xpath query returned at least one value, only use the first one. + appendArgValue(argDecl, argValueNodes[0].InnerText()) + case KindArray: + argValue, err := p.parseNode(n, argDecl) + if err != nil { + return nil, err + } + if argValue == nil { + break + } + for _, v := range argValue.([]interface{}) { + appendArgValue(argDecl, v) + } + default: + // Normal case not involving field (so const/external/nested custom_func) + v, err := p.parseNode(n, argDecl) + if err != nil { + return nil, err + } + appendArgValue(argDecl, v) + } + } + return argValues, nil +} diff --git a/omniparser/schemaplugin/omni/v2/transform/invokeCustomFunc_test.go b/omniparser/schemaplugin/omni/v2/transform/invokeCustomFunc_test.go new file mode 100644 index 0000000..a06f686 --- /dev/null +++ b/omniparser/schemaplugin/omni/v2/transform/invokeCustomFunc_test.go @@ -0,0 +1,251 @@ +package transform + +import ( + "testing" + + node "github.com/antchfx/xmlquery" + "github.com/stretchr/testify/assert" + + "github.com/jf-tech/omniparser/strs" +) + +func testNode() *node.Node { + // A + // B + // C + nodeA := &node.Node{Type: node.ElementNode, Data: "A"} + nodeB := &node.Node{Type: node.ElementNode, Data: "B"} + textB := &node.Node{Type: node.TextNode, Data: "b"} + nodeC := &node.Node{Type: node.ElementNode, Data: "C"} + textC := &node.Node{Type: node.TextNode, Data: "c"} + node.AddChild(nodeA, nodeB) + node.AddChild(nodeB, textB) + node.AddChild(nodeA, nodeC) + node.AddChild(nodeC, textC) + return nodeA +} + +func TestInvokeCustomFuncForExternal_Success(t *testing.T) { + result, err := testParseCtx().invokeCustomFunc( + testNode(), + &CustomFuncDecl{ + Name: "upper", + Args: []*Decl{ + { + External: strs.StrPtr("abc"), + kind: KindExternal, + }, + }, + }) + assert.NoError(t, err) + assert.Equal(t, "EFG", result) +} + +func TestInvokeCustomFunc_Success(t *testing.T) { + result, err := testParseCtx().invokeCustomFunc( + testNode(), + &CustomFuncDecl{ + Name: "concat", + Args: []*Decl{ + {Const: strs.StrPtr("["), kind: KindConst}, + // multiple values returned and only the first one is used. + // note the only time when multiple values are used is when an xpath arg is the only + // arg to a variadic custom func. + {XPath: strs.StrPtr("*"), kind: KindField}, + {Const: strs.StrPtr("'"), kind: KindConst}, + { + CustomFunc: &CustomFuncDecl{ + Name: "upper", + Args: []*Decl{ + // this xpath going up too far, xquery failed and empty string is return/used. + {XPath: strs.StrPtr("../../Huh"), kind: KindField}, + }, + }, + kind: KindCustomFunc, + }, + { + CustomFunc: &CustomFuncDecl{ + Name: "external", + Args: []*Decl{ + // this would cause 'external' custom func to fail, but + // IgnoreErrorAndReturnEmptyStr would come in for rescue. + {Const: strs.StrPtr("non-existing"), kind: KindConst}, + }, + IgnoreErrorAndReturnEmptyStr: true, + fqdn: "test_fqdn", + }, + kind: KindCustomFunc, + }, + {Const: strs.StrPtr("'"), kind: KindConst}, + { + CustomFunc: &CustomFuncDecl{ + Name: "concat", + Args: []*Decl{ + {XPath: strs.StrPtr("*"), kind: KindField}, // multiple values returned and used. + }, + }, + kind: KindCustomFunc, + }, + }, + }) + assert.NoError(t, err) + assert.Equal(t, "[b''bc", result) +} + +func TestInvokeCustomFunc_MultipleValueFromXPathUsed(t *testing.T) { + result, err := testParseCtx().invokeCustomFunc( + testNode(), + &CustomFuncDecl{ + Name: "concat", + Args: []*Decl{ + // multiple values returned and all values are used. + // note the only time when multiple values are used is when an xpath arg is the only + // arg to a variadic custom func. + {XPath: strs.StrPtr("*"), kind: KindField}, + }, + }) + assert.NoError(t, err) + assert.Equal(t, "bc", result) +} + +func TestInvokeCustomFunc_ArrayArgSuccess(t *testing.T) { + decl := &CustomFuncDecl{ + Name: "concat", + Args: []*Decl{ + // This array arg will return 'B' and 'C' + {kind: KindArray, Array: []*Decl{ + {kind: KindCustomFunc, XPath: strs.StrPtr("*"), CustomFunc: &CustomFuncDecl{ + Name: "upper", + Args: []*Decl{ + {XPath: strs.StrPtr("."), kind: KindField}, + }, + }}, + }}, + // This array arg will nothing + {kind: KindArray, Array: []*Decl{ + {kind: KindCustomFunc, XPath: strs.StrPtr("non-existing"), CustomFunc: &CustomFuncDecl{ + Name: "upper", + Args: []*Decl{ + {XPath: strs.StrPtr("."), kind: KindField}, + }, + }}, + }}, + }, + } + decl.Args[0].children = append(decl.Args[0].children, decl.Args[0].Array[0]) + decl.Args[0].children[0].parent = decl.Args[0] + result, err := testParseCtx().invokeCustomFunc(testNode(), decl) + assert.NoError(t, err) + assert.Equal(t, "BC", result) +} + +func TestInvokeCustomFunc_ArrayArgFailure_InvalidXPath(t *testing.T) { + decl := &CustomFuncDecl{ + Name: "concat", + Args: []*Decl{ + {kind: KindArray, Array: []*Decl{ + {kind: KindCustomFunc, XPath: strs.StrPtr("<"), CustomFunc: &CustomFuncDecl{ + Name: "upper", + Args: []*Decl{ + {XPath: strs.StrPtr("."), kind: KindField}, + }, + }}, + }}, + }, + } + decl.Args[0].children = append(decl.Args[0].children, decl.Args[0].Array[0]) + decl.Args[0].children[0].parent = decl.Args[0] + _, err := testParseCtx().invokeCustomFunc(testNode(), decl) + assert.Error(t, err) + assert.Equal(t, + "xpath query '<' on '' failed: xpath '<' compilation failed: expression must evaluate to a node-set", + err.Error()) +} + +func TestParseCtx_InvokeCustomFuncFailure(t *testing.T) { + for _, test := range []struct { + name string + customFuncDecl *CustomFuncDecl + expectedErr string + }{ + { + name: "external property not found", + customFuncDecl: &CustomFuncDecl{ + Name: "upper", + Args: []*Decl{ + {External: strs.StrPtr("efg"), kind: KindExternal, fqdn: "test_fqdn"}, + }, + }, + expectedErr: "cannot find external property 'efg' on 'test_fqdn'", + }, + { + name: "failed custom func call", + customFuncDecl: &CustomFuncDecl{ + Name: "external", + Args: []*Decl{ + {Const: strs.StrPtr("non-existing"), kind: KindConst}, + }, + fqdn: "test_fqdn", + }, + expectedErr: "'test_fqdn' failed: cannot find external property 'non-existing'", + }, + { + name: "compute xpath failure", + customFuncDecl: &CustomFuncDecl{ + Name: "concat", + Args: []*Decl{ + { + XPathDynamic: &Decl{ + External: strs.StrPtr("non-existing"), + kind: KindExternal, + fqdn: "test_fqdn", + }, + kind: KindField, + }, + }, + }, + expectedErr: "cannot find external property 'non-existing' on 'test_fqdn'", + }, + { + name: "failed to match node", + customFuncDecl: &CustomFuncDecl{ + Name: "concat", + Args: []*Decl{ + {Const: strs.StrPtr("abc"), kind: KindConst}, + // xpath is syntactically invalid. + {XPath: strs.StrPtr("<"), kind: KindField, fqdn: "test_fqdn"}, + {Const: strs.StrPtr("abc"), kind: KindConst}, + }, + }, + expectedErr: "xpath query '<' for 'test_fqdn' failed: xpath '<' compilation failed: expression must evaluate to a node-set", + }, + { + name: "nested custom func failure", + customFuncDecl: &CustomFuncDecl{ + Name: "concat", + Args: []*Decl{ + {Const: strs.StrPtr("abc"), kind: KindConst}, + {Const: strs.StrPtr("efg"), kind: KindConst}, + { + CustomFunc: &CustomFuncDecl{ + Name: "external", + Args: []*Decl{ + {Const: strs.StrPtr("non-existing"), kind: KindConst}, // Invalid + }, + fqdn: "test_fqdn", + }, + kind: KindCustomFunc, + }, + }, + }, + expectedErr: "'test_fqdn' failed: cannot find external property 'non-existing'", + }, + } { + t.Run(test.name, func(t *testing.T) { + result, err := testParseCtx().invokeCustomFunc(testNode(), test.customFuncDecl) + assert.Error(t, err) + assert.Regexp(t, test.expectedErr, err.Error()) + assert.Equal(t, "", result) + }) + } +} diff --git a/omniparser/schemaplugin/omni/v2/transform/nodeToObject.go b/omniparser/schemaplugin/omni/v2/transform/nodeToObject.go new file mode 100644 index 0000000..dac1fb7 --- /dev/null +++ b/omniparser/schemaplugin/omni/v2/transform/nodeToObject.go @@ -0,0 +1,115 @@ +package transform + +import ( + node "github.com/antchfx/xmlquery" + + "github.com/jf-tech/omniparser/strs" +) + +func isText(n *node.Node) bool { + return n.Type == node.TextNode || n.Type == node.CharDataNode +} + +func isChildText(n *node.Node) bool { + // For all file formats except XML, if a node's first child is a text node, then the text node is + // guaranteed to be the only child. + // + // However, for XML, there are two cases: + // 1) an element node only contains text, e.g.: + // blah + // In this case, the node (xyz) first child is a text node and it is the only child node of xyz. + // 2) an element node contains sub element nodes. Due to the way XML is constructed, there might + // be a dummy text node at the beginning in this case, e.g.: + // + // blah + // + // The node (xyz) first child is actually a text node of "\n...." (. == space) then followed + // by element node . In this case, we cannot say this node 's child is of text node type. + // (Note there is in fact another text node in the example above, that's a text node "\n" right + // after before closing tag ) + return n.FirstChild != nil && isText(n.FirstChild) && n.FirstChild.NextSibling == nil +} + +func isChildArray(n *node.Node) bool { + if isChildText(n) { + return false + } + // Delimited, fixed-length don't have array cases. + // + // For json, all array children are element nodes with .Data == "". + // + // For xml, it's more complicated, because there is no native array notation in xml, but it can + // be simulated: + // + // blah1 + // blah2 + // + // + // Note that due to the way nodes are constructed for XML, there are "dummy" text nodes (with + // "\n" and spaces in .Data) sprinkled in between the element nodes. So to deal with it, + // we'll go through all the child nodes, ignore all the text nodes, and if all element nodes + // have the same .Data, then we assume it's array. For json array items are element nodes with + // .Data == "", so that logic works too. + // + // Only one exception in xml: + // + // blah + // + // Using that logic above, we'll consider this node has array as child, arguably true but + // counter common-sense. So we'll special case here: if there is only 1 element node, and its + // .Data isn't "" then it's **not** considered as array. Hope this common-sense classification + // work for most cases. + // + // EDI is very similar to xml case (except without those dummy text nodes) + elemCount := 0 + elemName := (*string)(nil) + for child := n.FirstChild; child != nil; child = child.NextSibling { + if child.Type != node.ElementNode { + continue + } + elemCount++ + if elemName == nil { + elemName = strs.StrPtr(child.Data) + } else if child.Data != *elemName { + return false + } + } + return elemCount > 1 || (elemCount == 1 && *elemName == "") +} + +func nodeToObject(n *node.Node) interface{} { + if n.FirstChild == nil { + return nil + } + + if isChildText(n) { + return n.FirstChild.Data + } + + if isChildArray(n) { + arr := []interface{}{} + for child := n.FirstChild; child != nil; child = child.NextSibling { + if child.Type != node.ElementNode { + continue + } + arr = append(arr, nodeToObject(child)) + } + return arr + } + + obj := map[string]interface{}{} + for child := n.FirstChild; child != nil; child = child.NextSibling { + if child.Type != node.ElementNode { + continue + } + // Note: overwrite is a possible in xml input, e.g.: + // + // blah1 + // blah2 + // blah3 + // + // we'll end up returning map[string]interface{}{ "abc": "blah2", "efg": "blah3" } + obj[child.Data] = nodeToObject(child) + } + return obj +} diff --git a/omniparser/schemaplugin/omni/v2/transform/nodeToObject_test.go b/omniparser/schemaplugin/omni/v2/transform/nodeToObject_test.go new file mode 100644 index 0000000..2035c55 --- /dev/null +++ b/omniparser/schemaplugin/omni/v2/transform/nodeToObject_test.go @@ -0,0 +1,151 @@ +package transform + +import ( + "strings" + "testing" + + node "github.com/antchfx/xmlquery" + "github.com/stretchr/testify/assert" +) + +// TODO: we don't have json stream parser ready yet. Leave out all the json testing. + +func xmlToTestNode(t *testing.T, xpath, xmlStr string) *node.Node { + p, err := node.CreateStreamParser(strings.NewReader(xmlStr), xpath) + assert.NoError(t, err) + n, err := p.Read() + assert.NoError(t, err) + return n +} + +func TestIsChildText(t *testing.T) { + for _, test := range []struct { + name string + xpath string + xmlStr string + isTextNode bool + }{ + { + name: "xml: child is text node", + xpath: "a", + xmlStr: `text`, + isTextNode: true, + }, + { + name: "xml: child is array", + xpath: "a", + xmlStr: ` + 1 + 2 + `, + isTextNode: false, + }, + { + name: "xml: child is object", + xpath: "a", + xmlStr: ` + 1 + 2 + `, + isTextNode: false, + }, + } { + t.Run(test.name, func(t *testing.T) { + n := xmlToTestNode(t, test.xpath, test.xmlStr) + assert.Equal(t, test.isTextNode, isChildText(n)) + }) + } +} + +func TestIsChildArray(t *testing.T) { + for _, test := range []struct { + name string + xpath string + xmlStr string + isArray bool + }{ + { + name: "xml: child is text node", + xpath: "a", + xmlStr: `text`, + isArray: false, + }, + { + name: "xml: child is array", + xpath: "a", + xmlStr: ` + 1 + 2 + `, + isArray: true, + }, + { + name: "xml: child is object with multiple elements", + xpath: "a", + xmlStr: ` + 1 + 2 + 3 + `, + isArray: false, + }, + { + name: "xml: child is object with single element", + xpath: "a", + xmlStr: ` + 1 + `, + isArray: false, + }, + } { + t.Run(test.name, func(t *testing.T) { + n := xmlToTestNode(t, test.xpath, test.xmlStr) + assert.Equal(t, test.isArray, isChildArray(n)) + }) + } +} + +func TestNodeToObject_NoChild(t *testing.T) { + assert.Nil(t, nodeToObject(&node.Node{Type: node.ElementNode, Data: "a"})) +} + +func TestNodeToObject_ChildIsText(t *testing.T) { + assert.Equal(t, "1", nodeToObject(xmlToTestNode(t, "a/b", "1"))) +} + +func TestNodeToObject_ChildIsArray(t *testing.T) { + // Testing xml array with single element to see if isChildArray will mistake it as array or not. + assert.Equal(t, + map[string]interface{}{ + "b": "1", + }, + nodeToObject(xmlToTestNode(t, "a", + ` + 1 + `))) + + // Testing xml array with multiple elements + assert.Equal(t, + []interface{}{"1", "2", "3"}, + nodeToObject(xmlToTestNode(t, "a", + ` + 1 + 2 + 3 + `))) +} + +func TestNodeToObject_ChildIsObject(t *testing.T) { + // Testing xml child object with conflict names getting overwritten. + assert.Equal(t, + map[string]interface{}{ + "b": "2", + "c": "3", + }, + nodeToObject(xmlToTestNode(t, "a", + ` + 1 + 2 + 3 + `))) +} diff --git a/omniparser/schemaplugin/omni/v2/transform/parse.go b/omniparser/schemaplugin/omni/v2/transform/parse.go new file mode 100644 index 0000000..6a98588 --- /dev/null +++ b/omniparser/schemaplugin/omni/v2/transform/parse.go @@ -0,0 +1,345 @@ +package transform + +import ( + "encoding/json" + "fmt" + "reflect" + "strconv" + "strings" + "unsafe" + + node "github.com/antchfx/xmlquery" + + "github.com/jf-tech/omniparser/omniparser/customfuncs" + "github.com/jf-tech/omniparser/omniparser/nodes" + "github.com/jf-tech/omniparser/omniparser/transformctx" + "github.com/jf-tech/omniparser/strs" +) + +type parseCtx struct { + opCtx *transformctx.Ctx + customFuncs customfuncs.CustomFuncs + disableTransformCache bool // by default we have caching on. only in some tests we turn caching off. + transformCache map[string]interface{} +} + +func newParseCtx(opCtx *transformctx.Ctx, customFuncs customfuncs.CustomFuncs) *parseCtx { + return &parseCtx{ + opCtx: opCtx, + customFuncs: customFuncs, + disableTransformCache: false, + transformCache: map[string]interface{}{}, + } +} + +func nodePtrAddrStr(n *node.Node) string { + // `uintptr` is faster than `fmt.Sprintf("%p"...)` + return strconv.FormatUint(uint64(uintptr(unsafe.Pointer(n))), 16) +} + +func resultTypeConversion(decl *Decl, value string) (interface{}, error) { + if decl.resultType() == ResultTypeString { + return value, nil + } + // after this point, result type isn't of string. + + // Omit the field in final result if it is empty with non-string type. + if !strs.IsStrNonBlank(value) { + return nil, nil + } + + switch decl.resultType() { + case ResultTypeInt: + f, err := strconv.ParseFloat(value, 64) + if err != nil { + return nil, err + } + return int64(f), nil + case ResultTypeFloat: + f, err := strconv.ParseFloat(value, 64) + if err != nil { + return nil, err + } + return f, nil + case ResultTypeBoolean: + b, err := strconv.ParseBool(value) + if err != nil { + return nil, err + } + return b, nil + default: + return value, nil + } +} + +func normalizeAndSaveValue(decl *Decl, value interface{}, save func(interface{})) error { + if value == nil { + if decl.KeepEmptyOrNull { + save(nil) + } + return nil + } + // Now value != nil + switch reflect.ValueOf(value).Kind() { + case reflect.String: + strValue := value.(string) + if !decl.KeepLeadingTrailingSpace { + strValue = strings.TrimSpace(strValue) + } + // Can't use strs.IsStrNonBlank() because it does trim before comparing to empty string. + if strValue == "" && !decl.KeepEmptyOrNull { + return nil + } + typedResult, err := resultTypeConversion(decl, strValue) + if err != nil { + return fmt.Errorf("fail to convert value '%s' to type '%s' on '%s', err: %s", + strValue, decl.resultType(), decl.fqdn, err.Error()) + } + if typedResult != nil || decl.KeepEmptyOrNull { + save(typedResult) + } + return nil + // Applies to KindArray + case reflect.Slice: + if len(value.([]interface{})) > 0 || decl.KeepEmptyOrNull { + save(value) + } + return nil + // Applies to KindObject + case reflect.Map: + if len(value.(map[string]interface{})) > 0 || decl.KeepEmptyOrNull { + save(value) + } + return nil + default: + save(value) + return nil + } +} + +func normalizeAndReturnValue(decl *Decl, value interface{}) (interface{}, error) { + var returnValue interface{} + err := normalizeAndSaveValue(decl, value, func(normalizedValue interface{}) { + returnValue = normalizedValue + }) + if err != nil { + return nil, err + } + return returnValue, nil +} + +func (p *parseCtx) parseNode(n *node.Node, decl *Decl) (interface{}, error) { + var cacheKey string + if !p.disableTransformCache { + cacheKey = nodePtrAddrStr(n) + "/" + decl.hash + if cacheValue, found := p.transformCache[cacheKey]; found { + return cacheValue, nil + } + } + + saveIntoCache := func(value interface{}, err error) (interface{}, error) { + if !p.disableTransformCache { + if err != nil { + return value, err + } + p.transformCache[cacheKey] = value + } + return value, err + } + + switch decl.kind { + case KindConst: + return saveIntoCache(p.parseConst(decl)) + case KindExternal: + return saveIntoCache(p.parseExternal(decl)) + case KindField: + return saveIntoCache(p.parseField(n, decl)) + case KindObject: + return saveIntoCache(p.parseObject(n, decl)) + case KindArray: + return saveIntoCache(p.parseArray(n, decl)) + case KindCustomFunc: + return saveIntoCache(p.parseCustomFunc(n, decl)) + // Actually validation phase ensures no further situations as we already replaced `template` kind. + default: + return nil, fmt.Errorf("unexpected decl kind '%s' on '%s'", decl.kind, decl.fqdn) + } +} + +func (p *parseCtx) parseConst(decl *Decl) (interface{}, error) { + return normalizeAndReturnValue(decl, *decl.Const) +} + +func (p *parseCtx) parseExternal(decl *Decl) (interface{}, error) { + if v, found := p.opCtx.ExternalProperty(*decl.External); found { + return normalizeAndReturnValue(decl, v) + } + return "", fmt.Errorf("cannot find external property '%s' on '%s'", *decl.External, decl.fqdn) +} + +func xpathQueryNeeded(decl *Decl) bool { + // For a given transform, we only do xpath query, if + // - it has "xpath" or "xpath_dynamic" defined in its decl AND + // - it is not a child of array decl. + // The second condition is because for array's child transform, the xpath query is done at array level. + // See details in parseArray(). + // Now, if the transform is FINAL_OUTPUT, we never do xpath query on that, FINAL_OUTPUT's content node + // is always supplied by reader. + return decl.fqdn != finalOutput && + decl.isXPathSet() && + (decl.parent == nil || decl.parent.kind != KindArray) +} + +func (p *parseCtx) computeXPath(n *node.Node, decl *Decl) (xpath string, dynamic bool, err error) { + switch { + case strs.IsStrPtrNonBlank(decl.XPath): + xpath, dynamic, err = *(decl.XPath), false, nil + case decl.XPathDynamic != nil: + dynamic = true + xpath, err = p.computeXPathDynamic(n, decl.XPathDynamic) + default: + xpath, dynamic, err = ".", false, nil + } + return xpath, dynamic, err +} + +func (p *parseCtx) computeXPathDynamic(n *node.Node, xpathDynamicDecl *Decl) (string, error) { + v, err := p.parseNode(n, xpathDynamicDecl) + if err != nil { + return "", err + } + // if v is straight out nil, then we should fail out + // if v isn't nil, it could be an interface{} type whose value is nil; or it could be some valid values. + // note we need to guard the IsNil call as it would panic if v kind isn't interface/chan/func/map/slice/ptr. + // note we only need to ensure for kind == interface, because parseNode will never return + // chan/func/ptr. It's possible to return map/slice, but in earlier validation (validateXPath) we already + // ensured `xpath_dynamic` result type is string. + if v == nil || (reflect.ValueOf(v).Kind() == reflect.Interface && reflect.ValueOf(v).IsNil()) { + return "", fmt.Errorf("'%s' failed to yield a single value: no node matched", xpathDynamicDecl.fqdn) + } + return v.(string), nil +} + +func xpathMatchFlags(dynamic bool) uint { + if dynamic { + return nodes.DisableXPathCache + } + return 0 +} + +func (p *parseCtx) querySingleNodeFromXPath(n *node.Node, decl *Decl) (*node.Node, error) { + if !xpathQueryNeeded(decl) { + return n, nil + } + xpath, dynamic, err := p.computeXPath(n, decl) + if err != nil { + return nil, nil + } + resultNode, err := nodes.MatchSingle(n, xpath, xpathMatchFlags(dynamic)) + switch { + case err == nodes.ErrNoMatch: + return nil, nil + case err == nodes.ErrMoreThanExpected: + return nil, fmt.Errorf("xpath query '%s' on '%s' yielded more than one result", xpath, decl.fqdn) + case err != nil: + return nil, fmt.Errorf("xpath query '%s' on '%s' failed: %s", xpath, decl.fqdn, err.Error()) + } + return resultNode, nil +} + +func (p *parseCtx) parseField(n *node.Node, decl *Decl) (interface{}, error) { + n, err := p.querySingleNodeFromXPath(n, decl) + if err != nil { + return nil, err + } + if n == nil { + return normalizeAndReturnValue(decl, nil) + } + if decl.resultType() == ResultTypeObject && n.Type == node.ElementNode { + // When a field's result_type is marked "object", we'll simply copy the selected + // node and all its children over directly. Note it doesn't/won't work pretty with + // XML input files, as XML might contains attributes, which can't really nicely + // translate into map[string]interface{}. All other file format types + // (csv/edi/fixed-length/json) are fine. Well, so be it the limitation. + return normalizeAndReturnValue(decl, nodeToObject(n)) + } + return normalizeAndReturnValue(decl, n.InnerText()) +} + +func (p *parseCtx) parseCustomFunc(n *node.Node, decl *Decl) (interface{}, error) { + n, err := p.querySingleNodeFromXPath(n, decl) + if err != nil { + return nil, err + } + if n == nil { + return normalizeAndReturnValue(decl, nil) + } + funcValue, err := p.invokeCustomFunc(n, decl.CustomFunc) + if err != nil { + return nil, err + } + if decl.resultType() == ResultTypeObject && funcValue != "" { + var obj interface{} + if err := json.Unmarshal([]byte(funcValue), &obj); err != nil { + return nil, err + } + return normalizeAndReturnValue(decl, obj) + } + return normalizeAndReturnValue(decl, funcValue) +} + +func (p *parseCtx) parseObject(n *node.Node, decl *Decl) (interface{}, error) { + n, err := p.querySingleNodeFromXPath(n, decl) + if err != nil { + return nil, err + } + if n == nil { + return normalizeAndReturnValue(decl, nil) + } + object := map[string]interface{}{} + for _, childDecl := range decl.children { + childValue, err := p.parseNode(n, childDecl) + if err != nil { + return nil, err + } + // value returned by p.parseNode is already normalized, thus this + // normalizeAndSaveValue won't fail. + _ = normalizeAndSaveValue(childDecl, childValue, func(normalizedValue interface{}) { + object[strs.LastNameletOfFQDN(childDecl.fqdn)] = normalizedValue + }) + } + return normalizeAndReturnValue(decl, object) +} + +func (p *parseCtx) parseArray(n *node.Node, decl *Decl) (interface{}, error) { + var array []interface{} + for _, childDecl := range decl.children { + // if a particular child Decl has xpath, then we'll multi-select nodes based on that + // xpath, transform each of the nodes based on the child Decl, and save to the array. + // if a particular child Decl has no xpath, then we'll simply use its parent n, i.e. + // the current n, and do child Decl transform and save to the array. + // Note computeXPath() already does this for us: if xpath/xpath_dynamic both null, it + // returns xpath "." which gives us the current node when we use it to query the current + // node. + xpath, dynamic, err := p.computeXPath(n, childDecl) + if err != nil { + continue + } + nodes, err := nodes.MatchAll(n, xpath, xpathMatchFlags(dynamic)) + if err != nil { + return nil, fmt.Errorf("xpath query '%s' on '%s' failed: %s", xpath, childDecl.fqdn, err.Error()) + } + for _, nodeForChildDecl := range nodes { + childValue, err := p.parseNode(nodeForChildDecl, childDecl) + if err != nil { + return nil, err + } + // value returned by p.parseNode is already normalized, thus this + // normalizeAndSaveValue won't fail. + _ = normalizeAndSaveValue(childDecl, childValue, func(normalizedValue interface{}) { + array = append(array, normalizedValue) + }) + } + } + return normalizeAndReturnValue(decl, array) +} diff --git a/omniparser/schemaplugin/omni/v2/transform/parse_test.go b/omniparser/schemaplugin/omni/v2/transform/parse_test.go new file mode 100644 index 0000000..287ac15 --- /dev/null +++ b/omniparser/schemaplugin/omni/v2/transform/parse_test.go @@ -0,0 +1,966 @@ +package transform + +import ( + "regexp" + "testing" + + "github.com/stretchr/testify/assert" + + "github.com/jf-tech/omniparser/omniparser/customfuncs" + "github.com/jf-tech/omniparser/omniparser/nodes" + "github.com/jf-tech/omniparser/omniparser/transformctx" + "github.com/jf-tech/omniparser/strs" +) + +func testParseCtx() *parseCtx { + ctx := newParseCtx( + &transformctx.Ctx{ + InputName: "test-input", + ExternalProperties: map[string]string{"abc": "efg"}, + }, + customfuncs.Merge( + customfuncs.CustomFuncs{ + "test_func": func(_ *transformctx.Ctx, args ...string) (string, error) { + return "test", nil + }, + }, + customfuncs.BuiltinCustomFuncs)) + // by default disabling transform cache in test because vast majority of + // test cases don't have their decls' hash computed. + ctx.disableTransformCache = true + return ctx +} + +func TestResultTypeConversion(t *testing.T) { + for _, test := range []struct { + name string + value string + decl *Decl + expectedValue interface{} + expectedErr string + }{ + { + name: "result_type not specified", + value: "test", + decl: &Decl{}, + expectedValue: "test", + expectedErr: "", + }, + { + name: "string result_type for empty string", + value: "", + decl: &Decl{ResultType: testResultType(ResultTypeString)}, + expectedValue: "", + expectedErr: "", + }, + { + name: "non-string result_type for empty string", + value: "", + decl: &Decl{ResultType: testResultType(ResultTypeInt)}, + expectedValue: nil, + expectedErr: "", + }, + { + name: "string result_type for non-empty string", + value: "test", + decl: &Decl{ResultType: testResultType(ResultTypeString)}, + expectedValue: "test", + expectedErr: "", + }, + { + name: "int result_type for non-empty string", + value: "123", + decl: &Decl{ResultType: testResultType(ResultTypeInt)}, + expectedValue: int64(123), + expectedErr: "", + }, + { + name: "failed parsing for int result_type", + value: "abc", + decl: &Decl{ResultType: testResultType(ResultTypeInt)}, + expectedValue: nil, + expectedErr: `strconv.ParseFloat: parsing "abc": invalid syntax`, + }, + { + name: "float result_type for non-empty string", + value: "123.45", + decl: &Decl{ResultType: testResultType(ResultTypeFloat)}, + expectedValue: 123.45, + expectedErr: "", + }, + { + name: "failed parsing for float result_type", + value: "abc", + decl: &Decl{ResultType: testResultType(ResultTypeFloat)}, + expectedValue: nil, + expectedErr: `strconv.ParseFloat: parsing "abc": invalid syntax`, + }, + { + name: "boolean result_type for non-empty string", + value: "true", + decl: &Decl{ResultType: testResultType(ResultTypeBoolean)}, + expectedValue: true, + expectedErr: "", + }, + { + name: "failed parsing for boolean result_type", + value: "abc", + decl: &Decl{ResultType: testResultType(ResultTypeBoolean)}, + expectedValue: nil, + expectedErr: `strconv.ParseBool: parsing "abc": invalid syntax`, + }, + } { + t.Run(test.name, func(t *testing.T) { + linkParent(test.decl) + typedValue, err := resultTypeConversion(test.decl, test.value) + switch test.expectedErr { + case "": + assert.NoError(t, err) + default: + assert.Error(t, err) + assert.Regexp(t, test.expectedErr, err.Error()) + } + assert.Equal(t, test.expectedValue, typedValue) + }) + } +} + +func TestNormalizeAndSaveValue(t *testing.T) { + for _, test := range []struct { + name string + decl *Decl + value interface{} + expectedValue interface{} + expectedSaveCalled bool + expectedErr string + }{ + { + name: "nil value with KeepEmptyOrNull false", + decl: &Decl{}, + value: nil, + expectedValue: nil, + expectedSaveCalled: false, + expectedErr: "", + }, + { + name: "nil value with KeepEmptyOrNull true", + decl: &Decl{KeepEmptyOrNull: true}, + value: nil, + expectedValue: nil, + expectedSaveCalled: true, + expectedErr: "", + }, + { + name: "non string value saved", + decl: &Decl{}, + value: 123.45, + expectedValue: 123.45, + expectedSaveCalled: true, + expectedErr: "", + }, + { + name: "value is string and KeepLeadingTrailingSpace false", + decl: &Decl{}, + value: " test ", + expectedValue: "test", + expectedSaveCalled: true, + expectedErr: "", + }, + { + name: "value is empty string and KeepEmptyOrNull false", + decl: &Decl{}, + value: "", + expectedValue: nil, + expectedSaveCalled: false, + expectedErr: "", + }, + { + name: "value is string and KeepLeadingTrailingSpace true", + decl: &Decl{KeepLeadingTrailingSpace: true}, + value: " test ", + expectedValue: " test ", + expectedSaveCalled: true, + expectedErr: "", + }, + { + name: "value is string but can't convert to result type", + decl: &Decl{ + ResultType: testResultType(ResultTypeInt), + fqdn: "test_fqdn", + }, + value: "abc", + expectedValue: nil, + expectedSaveCalled: false, + expectedErr: `fail to convert value 'abc' to type 'int' on 'test_fqdn', err: strconv.ParseFloat: parsing "abc": invalid syntax`, + }, + { + name: "value is empty slice and KeepEmptyOrNull false", + decl: &Decl{}, + value: []interface{}{}, + expectedValue: nil, + expectedSaveCalled: false, + expectedErr: "", + }, + { + name: "value is empty slice and KeepEmptyOrNull true", + decl: &Decl{KeepEmptyOrNull: true}, + value: []interface{}{}, + expectedValue: []interface{}{}, + expectedSaveCalled: true, + expectedErr: "", + }, + { + name: "value is non-empty slice and KeepEmptyOrNull false", + decl: &Decl{}, + value: []interface{}{"string1"}, + expectedValue: []interface{}{"string1"}, + expectedSaveCalled: true, + expectedErr: "", + }, + { + name: "value is empty map and KeepEmptyOrNull false", + decl: &Decl{}, + value: map[string]interface{}{}, + expectedValue: nil, + expectedSaveCalled: false, + expectedErr: "", + }, + { + name: "value is empty map and KeepEmptyOrNull true", + decl: &Decl{KeepEmptyOrNull: true}, + value: map[string]interface{}{}, + expectedValue: map[string]interface{}{}, + expectedSaveCalled: true, + expectedErr: "", + }, + { + name: "value is non-empty map and KeepEmptyOrNull false", + decl: &Decl{}, + value: map[string]interface{}{"test_key": "test_value"}, + expectedValue: map[string]interface{}{"test_key": "test_value"}, + expectedSaveCalled: true, + expectedErr: "", + }, + } { + t.Run(test.name, func(t *testing.T) { + linkParent(test.decl) + saveCalled := false + err := normalizeAndSaveValue(test.decl, test.value, func(normalizedValue interface{}) { + saveCalled = true + assert.Equal(t, test.expectedValue, normalizedValue) + }) + assert.Equal(t, test.expectedSaveCalled, saveCalled) + switch test.expectedErr { + case "": + assert.NoError(t, err) + default: + assert.Error(t, err) + assert.Regexp(t, test.expectedErr, err.Error()) + } + }) + } +} + +func TestComputeXPath(t *testing.T) { + for _, test := range []struct { + name string + decl *Decl + expectedErr string + expectedXPath string + }{ + { + name: "xpath specified", + decl: &Decl{XPath: strs.StrPtr("A/B")}, + expectedErr: "", + expectedXPath: "A/B", + }, + { + name: "xpath_dynamic - const", + decl: &Decl{XPathDynamic: &Decl{Const: strs.StrPtr("A/C"), kind: KindConst}}, + expectedErr: "", + expectedXPath: "A/C", + }, + { + name: "xpath_dynamic - invalid xpath", + decl: &Decl{XPathDynamic: &Decl{XPath: strs.StrPtr("<"), kind: KindField, fqdn: "fqdn"}}, + expectedErr: "xpath query '<' on 'fqdn' failed: xpath '<' compilation failed: expression must evaluate to a node-set", + expectedXPath: "", + }, + { + name: "xpath_dynamic - no match", + decl: &Decl{ + XPathDynamic: &Decl{ + XPath: strs.StrPtr("A/non-existing"), + kind: KindField, + fqdn: "test_fqdn", + }}, + expectedErr: "'test_fqdn' failed to yield a single value: no node matched", + expectedXPath: "", + }, + { + name: "xpath_dynamic - xpath - success", + decl: &Decl{XPathDynamic: &Decl{XPath: strs.StrPtr("C"), kind: KindField}}, + expectedErr: "", + expectedXPath: "c", + }, + { + name: "xpath_dynamic - custom_func - err", + decl: &Decl{XPathDynamic: &Decl{ + CustomFunc: &CustomFuncDecl{ + Name: "substring", + Args: []*Decl{ + {Const: strs.StrPtr(""), kind: KindConst}, + {Const: strs.StrPtr("123"), kind: KindConst}, // will cause an out of bound error + {Const: strs.StrPtr("321"), kind: KindConst}, + }, + fqdn: "test_fqdn", + }, + kind: KindCustomFunc, + }}, + expectedErr: `'test_fqdn' failed: start index 123 is out of bounds (string length is 0)`, + expectedXPath: "", + }, + { + name: "xpath_dynamic - custom_func - success", + decl: &Decl{XPathDynamic: &Decl{ + CustomFunc: &CustomFuncDecl{ + Name: "concat", + Args: []*Decl{ + {Const: strs.StrPtr("."), kind: KindConst}, + {Const: strs.StrPtr("/"), kind: KindConst}, + {Const: strs.StrPtr("B"), kind: KindConst}, + }, + }, + kind: KindCustomFunc, + }}, + expectedErr: "", + expectedXPath: "./B", + }, + { + name: "xpath / xpath_dynamic both not specified, default to '.'", + decl: &Decl{}, + expectedErr: "", + expectedXPath: ".", + }, + } { + t.Run(test.name, func(t *testing.T) { + xpath, dynamic, err := testParseCtx().computeXPath(testNode(), test.decl) + switch { + case strs.IsStrNonBlank(test.expectedErr): + assert.Error(t, err) + assert.Equal(t, test.expectedErr, err.Error()) + assert.Equal(t, "", xpath) + default: + assert.NoError(t, err) + assert.Equal(t, test.expectedXPath, xpath) + assert.Equal(t, test.decl.XPathDynamic != nil, dynamic) + } + }) + } +} + +func TestXPathMatchFlags(t *testing.T) { + dynamic := true + assert.Equal(t, nodes.DisableXPathCache, xpathMatchFlags(dynamic)) + dynamic = false + assert.Equal(t, uint(0), xpathMatchFlags(dynamic)) +} + +func TestParseCtx_ParseNode(t *testing.T) { + for _, test := range []struct { + name string + decl *Decl + expectedValue interface{} + expectedErr string + }{ + { + name: "unsupported kind", + decl: &Decl{kind: "unsupported", fqdn: "test_fqdn"}, + expectedValue: nil, + expectedErr: "unexpected decl kind 'unsupported' on 'test_fqdn'", + }, + { + name: "const kind", + decl: &Decl{Const: strs.StrPtr("test_const"), kind: KindConst}, + expectedValue: "test_const", + expectedErr: "", + }, + { + name: "External kind", + decl: &Decl{External: strs.StrPtr("abc"), kind: KindExternal}, + expectedValue: "efg", + expectedErr: "", + }, + { + name: "field kind", + decl: &Decl{XPath: strs.StrPtr("B"), kind: KindField}, + expectedValue: "b", + expectedErr: "", + }, + { + name: "field xpath query failure", + decl: &Decl{XPath: strs.StrPtr("<"), kind: KindField, fqdn: "test_fqdn"}, + expectedValue: nil, + expectedErr: "xpath query '<' on 'test_fqdn' failed: xpath '<' compilation failed: expression must evaluate to a node-set", + }, + { + name: "object kind", + decl: &Decl{ + children: []*Decl{{XPath: strs.StrPtr("C"), kind: KindField, fqdn: "test_key"}}, + kind: KindObject, + }, + expectedValue: map[string]interface{}{ + "test_key": "c", + }, + expectedErr: "", + }, + { + name: "array kind", + decl: &Decl{ + children: []*Decl{{XPath: strs.StrPtr("B"), kind: KindField}}, + kind: KindArray, + }, + expectedValue: []interface{}{"b"}, + expectedErr: "", + }, + { + name: "custom_func kind", + decl: &Decl{ + CustomFunc: &CustomFuncDecl{ + Name: "concat", + Args: []*Decl{ + {Const: strs.StrPtr("abc"), kind: KindConst, hash: "hash-const"}, + {XPath: strs.StrPtr("B"), kind: KindField, hash: "hash-field"}, + { + CustomFunc: &CustomFuncDecl{ + Name: "lower", + Args: []*Decl{ + {Const: strs.StrPtr("A"), kind: KindConst, hash: "hash-const2"}, + }, + }, + kind: KindCustomFunc, + }, + {Const: strs.StrPtr("A"), kind: KindConst, hash: "hash-const2"}, + }, + }, + kind: KindCustomFunc, + }, + expectedValue: "abcbaA", + expectedErr: "", + }, + } { + t.Run(test.name, func(t *testing.T) { + linkParent(test.decl) + ctx := testParseCtx() + ctx.disableTransformCache = false + value, err := ctx.parseNode(testNode(), test.decl) + switch test.expectedErr { + case "": + assert.NoError(t, err) + default: + assert.Error(t, err) + assert.Regexp(t, test.expectedErr, err.Error()) + } + assert.Equal(t, test.expectedValue, value) + }) + } +} + +func TestParseConst(t *testing.T) { + value, err := testParseCtx().parseConst(&Decl{Const: strs.StrPtr("test_const")}) + assert.NoError(t, err) + assert.Equal(t, "test_const", value) +} + +func TestParseExternal(t *testing.T) { + for _, test := range []struct { + name string + decl *Decl + expectedValue string + expectedErr string + }{ + { + name: "externalProperties found", + decl: &Decl{External: strs.StrPtr("abc")}, + expectedValue: "efg", + expectedErr: "", + }, + { + name: "externalProperties not found", + decl: &Decl{External: strs.StrPtr("efg"), fqdn: "test_fqdn"}, + expectedValue: "", + expectedErr: "cannot find external property 'efg' on 'test_fqdn'", + }, + } { + t.Run(test.name, func(t *testing.T) { + linkParent(test.decl) + value, err := testParseCtx().parseExternal(test.decl) + switch test.expectedErr { + case "": + assert.NoError(t, err) + default: + assert.Error(t, err) + assert.Equal(t, test.expectedErr, err.Error()) + } + assert.Equal(t, test.expectedValue, value) + }) + } +} + +func resultTypePtr(t ResultType) *ResultType { + return &t +} +func TestParseCtx_ParseField(t *testing.T) { + for _, test := range []struct { + name string + decl *Decl + expectedValue interface{} + expectedErr string + }{ + { + name: "no query needed", + decl: &Decl{}, // by leaving both xpath/xpath_dynamic nil, xpathQueryNeeded returns false. + expectedValue: "bc", + expectedErr: "", + }, + { + name: "computeXPath failed so we default value to nil", + decl: &Decl{XPathDynamic: &Decl{External: strs.StrPtr("non-existing"), kind: KindExternal}}, + expectedValue: nil, + expectedErr: "", + }, + { + name: "matched", + decl: &Decl{XPath: strs.StrPtr("B"), kind: KindField}, + expectedValue: "b", + expectedErr: "", + }, + { + name: "no nodes matched", + decl: &Decl{XPath: strs.StrPtr("abc"), kind: KindField}, + expectedValue: nil, + expectedErr: "", + }, + { + name: "more than one node matched", + decl: &Decl{XPath: strs.StrPtr("*"), kind: KindField, fqdn: "test_fqdn"}, + expectedValue: nil, + expectedErr: "xpath query '*' on 'test_fqdn' yielded more than one result", + }, + { + name: "invalid xpath", + decl: &Decl{XPath: strs.StrPtr("<"), kind: KindField, fqdn: "test_fqdn"}, + expectedValue: nil, + expectedErr: "xpath query '<' on 'test_fqdn' failed: xpath '<' compilation failed: expression must evaluate to a node-set", + }, + { + name: "result_type == object", + decl: &Decl{ResultType: resultTypePtr(ResultTypeObject)}, + expectedValue: map[string]interface{}{"B": "b", "C": "c"}, + expectedErr: "", + }, + } { + t.Run(test.name, func(t *testing.T) { + linkParent(test.decl) + value, err := testParseCtx().parseField(testNode(), test.decl) + switch test.expectedErr { + case "": + assert.NoError(t, err) + default: + assert.Error(t, err) + assert.Equal(t, test.expectedErr, err.Error()) + } + assert.Equal(t, test.expectedValue, value) + }) + } +} + +func TestParseCtx_ParseCustomFunc(t *testing.T) { + for _, test := range []struct { + name string + decl *Decl + expectedValue interface{} + expectedErr string + }{ + { + name: "successful invoking", + decl: &Decl{ + CustomFunc: &CustomFuncDecl{ + Name: "concat", + Args: []*Decl{ + {Const: strs.StrPtr("abc"), kind: KindConst}, + {XPath: strs.StrPtr("B"), kind: KindField}, + { + CustomFunc: &CustomFuncDecl{ + Name: "lower", + Args: []*Decl{ + {Const: strs.StrPtr("A"), kind: KindConst}, + }, + }, + kind: KindCustomFunc, + }, + }, + }, + kind: KindCustomFunc, + }, + expectedValue: "abcba", + expectedErr: "", + }, + { + name: "failed invoking", + decl: &Decl{ + CustomFunc: &CustomFuncDecl{ + Name: "lower", + Args: []*Decl{ + {External: strs.StrPtr("non-existing"), kind: KindExternal, fqdn: "test_fqdn"}, + }, + IgnoreErrorAndReturnEmptyStr: false, + }, + kind: KindCustomFunc, + }, + expectedValue: nil, + expectedErr: "cannot find external property 'non-existing' on 'test_fqdn'", + }, + { + name: "xpath matches no node", + decl: &Decl{ + XPath: strs.StrPtr("NO MATCH"), + CustomFunc: &CustomFuncDecl{ + Name: "lower", + Args: []*Decl{ + {External: strs.StrPtr("non-existing"), kind: KindExternal}, + }, + IgnoreErrorAndReturnEmptyStr: false, + }, + kind: KindCustomFunc, + }, + expectedValue: nil, + expectedErr: "", + }, + { + name: "xpath matches more than one node", + decl: &Decl{ + XPath: strs.StrPtr("*"), + CustomFunc: &CustomFuncDecl{ + Name: "lower", + Args: []*Decl{ + {External: strs.StrPtr("non-existing"), kind: KindExternal}, + }, + IgnoreErrorAndReturnEmptyStr: false, + }, + kind: KindCustomFunc, + fqdn: "test_fqdn", + }, + expectedValue: nil, + expectedErr: regexp.QuoteMeta(`xpath query '*' on 'test_fqdn' yielded more than one result`), + }, + { + name: "resultType is object", + decl: &Decl{ + CustomFunc: &CustomFuncDecl{ + Name: "splitIntoJsonArray", + Args: []*Decl{ + {Const: strs.StrPtr("a/b/c"), kind: KindConst}, + {Const: strs.StrPtr("/"), kind: KindConst}, + {Const: strs.StrPtr("true"), kind: KindConst}, + }, + }, + kind: KindCustomFunc, + ResultType: testResultType(ResultTypeObject), + }, + expectedValue: []interface{}{"a", "b", "c"}, + expectedErr: "", + }, + { + name: "resultType is object but value is nil", + decl: &Decl{ + XPath: strs.StrPtr("NO MATCH"), + CustomFunc: &CustomFuncDecl{ + Name: "lower", + Args: []*Decl{ + {External: strs.StrPtr("non-existing"), kind: KindExternal}, + }, + IgnoreErrorAndReturnEmptyStr: false, + }, + kind: KindCustomFunc, + ResultType: testResultType(ResultTypeObject), + }, + expectedValue: nil, + expectedErr: "", + }, + { + name: "successful invoking and result_type object", + decl: &Decl{ + CustomFunc: &CustomFuncDecl{ + Name: "concat", + Args: []*Decl{ + {Const: strs.StrPtr("abc"), kind: KindConst}, + {XPath: strs.StrPtr("B"), kind: KindField}, + { + CustomFunc: &CustomFuncDecl{ + Name: "lower", + Args: []*Decl{ + {Const: strs.StrPtr("A"), kind: KindConst}, + }, + }, + kind: KindCustomFunc, + }, + }, + }, + kind: KindCustomFunc, + ResultType: testResultType(ResultTypeObject), + }, + expectedValue: nil, + expectedErr: "invalid character 'a' looking for beginning of value", + }, + } { + t.Run(test.name, func(t *testing.T) { + linkParent(test.decl) + value, err := testParseCtx().parseCustomFunc(testNode(), test.decl) + switch test.expectedErr { + case "": + assert.NoError(t, err) + default: + assert.Error(t, err) + assert.Regexp(t, test.expectedErr, err.Error()) + } + assert.Equal(t, test.expectedValue, value) + }) + } +} + +func TestParseCtx_ParseObject(t *testing.T) { + for _, test := range []struct { + name string + decl *Decl + expectedValue map[string]interface{} + expectedErr string + }{ + { + name: "final output", + decl: &Decl{ + fqdn: "FINAL_OUTPUT", + kind: KindObject, + children: []*Decl{ + { + fqdn: "FINAL_OUTPUT.test_key", + kind: KindField, + XPath: strs.StrPtr("C"), + }, + }, + }, + expectedValue: map[string]interface{}{ + "test_key": "c", + }, + expectedErr: "", + }, + { + name: "computeXPath failed", + decl: &Decl{ + fqdn: "test_fqdn", + kind: KindObject, + // this would cause computeXPath fail + XPathDynamic: &Decl{External: strs.StrPtr("non-existing"), kind: KindExternal}, + }, + expectedValue: nil, + expectedErr: "", // no error when nothing matched + }, + { + name: "no nodes matched for xpath", + decl: &Decl{ + fqdn: "test_fqdn", + kind: KindObject, + XPath: strs.StrPtr("abc"), // unmatched xpath + }, + expectedValue: nil, + expectedErr: "", // no error when nothing matched + }, + { + name: "invalid xpath", + decl: &Decl{ + fqdn: "test_fqdn", + kind: KindObject, + XPath: strs.StrPtr("<"), // invalid xpath + }, + expectedValue: nil, + expectedErr: "xpath query '<' on 'test_fqdn' failed: xpath '<' compilation failed: expression must evaluate to a node-set", + }, + { + name: "failed parsing on child node", + decl: &Decl{ + fqdn: "test_fqdn", + kind: KindObject, + children: []*Decl{ + { + fqdn: "test_fqdn.test_key", + kind: KindField, + XPath: strs.StrPtr("<"), // invalid xpath syntax. + }, + }, + }, + expectedValue: nil, + expectedErr: "xpath query '<' on 'test_fqdn.test_key' failed: xpath '<' compilation failed: expression must evaluate to a node-set", + }, + { + name: "failed normalization", + decl: &Decl{ + fqdn: "test_fqdn", + kind: KindObject, + children: []*Decl{ + { + fqdn: "test_fqdn.test_key", + kind: KindConst, + Const: strs.StrPtr("abc"), + ResultType: testResultType(ResultTypeInt), + }, + }, + }, + expectedValue: nil, + expectedErr: `fail to convert value 'abc' to type 'int' on 'test_fqdn.test_key', err: strconv.ParseFloat: parsing "abc": invalid syntax`, + }, + } { + t.Run(test.name, func(t *testing.T) { + linkParent(test.decl) + value, err := testParseCtx().parseObject(testNode(), test.decl) + switch test.expectedErr { + case "": + assert.NoError(t, err) + default: + assert.Error(t, err) + assert.Equal(t, test.expectedErr, err.Error()) + } + if test.expectedValue == nil { + assert.Nil(t, value) + } else { + assert.Equal(t, test.expectedValue, value) + } + }) + } +} + +func TestParseCtx_ParseArray(t *testing.T) { + for _, test := range []struct { + name string + decl *Decl + expectedValue []interface{} + expectedErr string + }{ + { + name: "empty array", + decl: &Decl{ + fqdn: "test_fqdn", + kind: KindArray, + children: []*Decl{}, + }, + expectedValue: nil, + expectedErr: "", + }, + { + name: "computeXPath failed", + decl: &Decl{ + fqdn: "test_fqdn", + kind: KindArray, + children: []*Decl{ + { + fqdn: "test_fqdn.test_key", + kind: KindField, + // this would cause computeXPath fail + XPathDynamic: &Decl{External: strs.StrPtr("non-existing"), kind: KindExternal}, + }, + }, + }, + expectedValue: nil, // if computeXPath fails, we'll just skip + expectedErr: "", + }, + { + name: "invalid xpath in child", + decl: &Decl{ + fqdn: "test_fqdn", + kind: KindArray, + children: []*Decl{ + { + fqdn: "test_fqdn.test_key", + kind: KindField, + XPath: strs.StrPtr("<"), // invalid xpath syntax. + }, + }, + }, + expectedValue: nil, + expectedErr: "xpath query '<' on 'test_fqdn.test_key' failed: xpath '<' compilation failed: expression must evaluate to a node-set", + }, + { + name: "no nodes matched for child xpath", + decl: &Decl{ + fqdn: "test_fqdn", + kind: KindArray, + children: []*Decl{ + { + fqdn: "test_fqdn.test_key", + kind: KindField, + XPath: strs.StrPtr("abc"), + }, + }, + }, + expectedValue: nil, + expectedErr: "", // no error when nothing matched + }, + { + name: "failed parsing child", + decl: &Decl{ + fqdn: "test_fqdn", + kind: KindArray, + children: []*Decl{ + { + fqdn: "test_fqdn.test_key", + kind: KindObject, + XPath: strs.StrPtr("."), + children: []*Decl{ + { + fqdn: "test_fqdn.test_key.test_key2", + kind: KindConst, + Const: strs.StrPtr("abc"), + ResultType: testResultType(ResultTypeInt), + }, + }, + }, + }, + }, + expectedValue: nil, + expectedErr: `fail to convert value 'abc' to type 'int' on 'test_fqdn.test_key.test_key2', err: strconv.ParseFloat: parsing "abc": invalid syntax`, + }, + { + name: "failed normalization", + decl: &Decl{ + fqdn: "test_fqdn", + kind: KindArray, + children: []*Decl{ + { + fqdn: "test_fqdn.test_key", + kind: KindConst, + Const: strs.StrPtr("abc"), + ResultType: testResultType(ResultTypeInt), + }, + }, + }, + expectedValue: nil, + expectedErr: `fail to convert value 'abc' to type 'int' on 'test_fqdn.test_key', err: strconv.ParseFloat: parsing "abc": invalid syntax`, + }, + } { + t.Run(test.name, func(t *testing.T) { + linkParent(test.decl) + value, err := testParseCtx().parseArray(testNode(), test.decl) + switch test.expectedErr { + case "": + assert.NoError(t, err) + default: + assert.Error(t, err) + assert.Equal(t, test.expectedErr, err.Error()) + } + if test.expectedValue == nil { + assert.Nil(t, value) + } else { + assert.Equal(t, test.expectedValue, value) + } + }) + } +} diff --git a/omniparser/schemaplugin/omni/v2/transform/validate_test.go b/omniparser/schemaplugin/omni/v2/transform/validate_test.go index 0eaa557..4d91f4b 100644 --- a/omniparser/schemaplugin/omni/v2/transform/validate_test.go +++ b/omniparser/schemaplugin/omni/v2/transform/validate_test.go @@ -9,7 +9,6 @@ import ( "github.com/jf-tech/omniparser/jsons" "github.com/jf-tech/omniparser/omniparser/customfuncs" "github.com/jf-tech/omniparser/strs" - "github.com/jf-tech/omniparser/testlib" ) func TestValidateTransformDeclarations(t *testing.T) { @@ -239,12 +238,12 @@ func TestDetectKind(t *testing.T) { }{ { name: "const", - decl: &Decl{Const: testlib.StrPtr("test")}, + decl: &Decl{Const: strs.StrPtr("test")}, expectedKind: KindConst, }, { name: "external", - decl: &Decl{External: testlib.StrPtr("test")}, + decl: &Decl{External: strs.StrPtr("test")}, expectedKind: KindExternal, }, { @@ -254,32 +253,32 @@ func TestDetectKind(t *testing.T) { }, { name: "object with empty map", - decl: &Decl{XPath: testlib.StrPtr("test"), Object: map[string]*Decl{}}, + decl: &Decl{XPath: strs.StrPtr("test"), Object: map[string]*Decl{}}, expectedKind: KindObject, }, { name: "object with non-empty map", decl: &Decl{ XPathDynamic: &Decl{}, - Object: map[string]*Decl{"a": {Const: testlib.StrPtr("test")}}, + Object: map[string]*Decl{"a": {Const: strs.StrPtr("test")}}, }, expectedKind: KindObject, }, { name: "array", decl: &Decl{ - Array: []*Decl{{Const: testlib.StrPtr("test")}}, + Array: []*Decl{{Const: strs.StrPtr("test")}}, }, expectedKind: KindArray, }, { name: "template", - decl: &Decl{XPath: testlib.StrPtr("test"), Template: testlib.StrPtr("test")}, + decl: &Decl{XPath: strs.StrPtr("test"), Template: strs.StrPtr("test")}, expectedKind: KindTemplate, }, { name: "field with xpath", - decl: &Decl{XPath: testlib.StrPtr("test")}, + decl: &Decl{XPath: strs.StrPtr("test")}, expectedKind: KindField, }, { @@ -303,9 +302,9 @@ func TestDetectKind(t *testing.T) { func TestComputeDeclHash(t *testing.T) { decl1 := &Decl{ Object: map[string]*Decl{ - "field3": {Const: testlib.StrPtr("const"), kind: KindConst, fqdn: "root.field3", hash: "h3"}, - "field1": {External: testlib.StrPtr("external"), kind: KindExternal, fqdn: "root.field1", hash: "h1"}, - "field2": {Template: testlib.StrPtr("template"), kind: KindTemplate, fqdn: "root.field2", hash: "h2"}, + "field3": {Const: strs.StrPtr("const"), kind: KindConst, fqdn: "root.field3", hash: "h3"}, + "field1": {External: strs.StrPtr("external"), kind: KindExternal, fqdn: "root.field1", hash: "h1"}, + "field2": {Template: strs.StrPtr("template"), kind: KindTemplate, fqdn: "root.field2", hash: "h2"}, }, kind: KindObject, fqdn: "root", diff --git a/omniparser/transformctx/ctx.go b/omniparser/transformctx/ctx.go index 7f34f55..8684d56 100644 --- a/omniparser/transformctx/ctx.go +++ b/omniparser/transformctx/ctx.go @@ -14,6 +14,8 @@ type ExtensionCtx = interface{} type Ctx struct { // InputName is the name of the input stream to be processed. InputName string + // ExternalProperties contains exteranlly set string properties used in schema. + ExternalProperties map[string]string // CtxAwareErr allows context aware error formatting such as adding input (file) name // and line number as a prefix to the error string. CtxAwareErr errs.CtxAwareErr @@ -22,3 +24,13 @@ type Ctx struct { // parsing/transform. ExtCtx ExtensionCtx } + +func (ctx *Ctx) ExternalProperty(name string) (string, bool) { + if len(ctx.ExternalProperties) == 0 { + return "", false + } + if v, found := ctx.ExternalProperties[name]; found { + return v, true + } + return "", false +} diff --git a/omniparser/transformctx/ctx_test.go b/omniparser/transformctx/ctx_test.go new file mode 100644 index 0000000..5793b1e --- /dev/null +++ b/omniparser/transformctx/ctx_test.go @@ -0,0 +1,53 @@ +package transformctx + +import ( + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestCtx_ExternalProperty(t *testing.T) { + for _, test := range []struct { + name string + externalProperties map[string]string + propNameToLookUp string + expectedValue string + expectedFound bool + }{ + { + name: "externalProperties nil", + externalProperties: nil, + propNameToLookUp: "abc", + expectedValue: "", + expectedFound: false, + }, + { + name: "externalProperties empty", + externalProperties: map[string]string{}, + propNameToLookUp: "efg", + expectedValue: "", + expectedFound: false, + }, + { + name: "can't find prop", + externalProperties: map[string]string{"abc": "abc"}, + propNameToLookUp: "efg", + expectedValue: "", + expectedFound: false, + }, + { + name: "found", + externalProperties: map[string]string{"abc": "123"}, + propNameToLookUp: "abc", + expectedValue: "123", + expectedFound: true, + }, + } { + t.Run(test.name, func(t *testing.T) { + ctx := &Ctx{ExternalProperties: test.externalProperties} + v, found := ctx.ExternalProperty(test.propNameToLookUp) + assert.Equal(t, test.expectedValue, v) + assert.Equal(t, test.expectedFound, found) + }) + } +} diff --git a/strs/strs.go b/strs/strs.go index ec60831..b1351c9 100644 --- a/strs/strs.go +++ b/strs/strs.go @@ -5,6 +5,11 @@ import ( "unicode" ) +// StrPtr returns string pointer that points to a given string value. +func StrPtr(s string) *string { + return &s +} + // IsStrNonBlank checks if a string is blank or not. func IsStrNonBlank(s string) bool { return len(strings.TrimFunc(s, unicode.IsSpace)) > 0 @@ -55,6 +60,24 @@ func BuildFQDN2(delimiter string, namelets ...string) string { return strings.Join(namelets, delimiter) } +// LastNameletOfFQDN returns the last namelet of an FQDN delimited by default +// delimiter. If there is no delimiter in the FQDN, then the FQDN itself is +// // returned. +func LastNameletOfFQDN(fqdn string) string { + return LastNameletOfFQDN2(FQDNDelimiter, fqdn) +} + +// LastNameletOfFQDN2 returns the last namelet of an FQDN delimited by given +// delimiter. If there is no delimiter in the FQDN, then the FQDN itself is +// returned. +func LastNameletOfFQDN2(delimiter, fqdn string) string { + index := strings.LastIndex(fqdn, delimiter) + if index < 0 { + return fqdn + } + return fqdn[index+1:] +} + // CopySlice copies a string slice. The returned slice is guaranteed to be a different // slice (thus the name Copy) so modifying the src from the caller side won't affect // the returned slice. diff --git a/strs/strs_test.go b/strs/strs_test.go index e2981ec..81aee27 100644 --- a/strs/strs_test.go +++ b/strs/strs_test.go @@ -6,10 +6,14 @@ import ( "testing" "github.com/stretchr/testify/assert" - - "github.com/jf-tech/omniparser/testlib" ) +func TestStrPtr(t *testing.T) { + sp := StrPtr("pi") + assert.NotNil(t, sp) + assert.Equal(t, "pi", *sp) +} + func TestIsStrNonBlank(t *testing.T) { for _, test := range []struct { name string @@ -53,13 +57,13 @@ func TestFirstNonBlank(t *testing.T) { } func TestStrPtrOrElse(t *testing.T) { - assert.Equal(t, "this", StrPtrOrElse(testlib.StrPtr("this"), "that")) + assert.Equal(t, "this", StrPtrOrElse(StrPtr("this"), "that")) assert.Equal(t, "that", StrPtrOrElse(nil, "that")) } func TestCopyStrPtr(t *testing.T) { assert.True(t, CopyStrPtr(nil) == nil) - src := testlib.StrPtr("abc") + src := StrPtr("abc") dst := CopyStrPtr(src) assert.Equal(t, *src, *dst) assert.True(t, fmt.Sprintf("%p", src) != fmt.Sprintf("%p", dst)) @@ -98,6 +102,44 @@ func TestBuildFQDN(t *testing.T) { } } +func TestLastNameletOfFQDN(t *testing.T) { + for _, test := range []struct { + name string + fqdn string + expected string + }{ + { + name: "empty", + fqdn: "", + expected: "", + }, + { + name: "no delimiter", + fqdn: "abc", + expected: "abc", + }, + { + name: "delimiter at beginning", + fqdn: ".abc", + expected: "abc", + }, + { + name: "delimiter at the end", + fqdn: "abc.", + expected: "", + }, + { + name: "fqdn", + fqdn: "abc.def.ghi", + expected: "ghi", + }, + } { + t.Run(test.name, func(t *testing.T) { + assert.Equal(t, test.expected, LastNameletOfFQDN(test.fqdn)) + }) + } +} + func TestCopySlice(t *testing.T) { for _, test := range []struct { name string diff --git a/testlib/testlib.go b/testlib/testlib.go index 4964b94..df90511 100644 --- a/testlib/testlib.go +++ b/testlib/testlib.go @@ -5,9 +5,3 @@ package testlib func IntPtr(n int) *int { return &n } - -// StrPtr returns a string pointer with a given value. -// Tests cases needed inline string pointer declaration can use this. -func StrPtr(s string) *string { - return &s -} diff --git a/testlib/testlib_test.go b/testlib/testlib_test.go index 7481d56..7564e68 100644 --- a/testlib/testlib_test.go +++ b/testlib/testlib_test.go @@ -11,9 +11,3 @@ func TestIntPtr(t *testing.T) { assert.NotNil(t, np) assert.Equal(t, 31415926, *np) } - -func TestStrPtr(t *testing.T) { - sp := StrPtr("pi") - assert.NotNil(t, sp) - assert.Equal(t, "pi", *sp) -}