From e3539feb706a513df03d3a18ae44803badaeb450 Mon Sep 17 00:00:00 2001 From: jf-tech Date: Tue, 1 Sep 2020 13:15:09 +1200 Subject: [PATCH 1/2] Finish implementation of Parser and move package `nodes` to under `omniparser` --- go.mod | 1 + go.sum | 4 ++ .../nodes}/.snapshots/TestReferenceTestTree | 0 {nodes => omniparser/nodes}/marshal_test.go | 0 {nodes => omniparser/nodes}/node.go | 0 {nodes => omniparser/nodes}/node_test.go | 0 {nodes => omniparser/nodes}/query.go | 0 {nodes => omniparser/nodes}/query_test.go | 0 omniparser/parser.go | 30 ++++++++++- omniparser/parser_test.go | 52 +++++++++++++++++-- omniparser/schemaplugin/header.go | 31 ++++++----- omniparser/schemaplugin/header_test.go | 42 ++++++++++----- omniparser/transformctx/ctx.go | 18 +++++++ 13 files changed, 143 insertions(+), 35 deletions(-) rename {nodes => omniparser/nodes}/.snapshots/TestReferenceTestTree (100%) rename {nodes => omniparser/nodes}/marshal_test.go (100%) rename {nodes => omniparser/nodes}/node.go (100%) rename {nodes => omniparser/nodes}/node_test.go (100%) rename {nodes => omniparser/nodes}/query.go (100%) rename {nodes => omniparser/nodes}/query_test.go (100%) diff --git a/go.mod b/go.mod index a44a1cc..00acfae 100644 --- a/go.mod +++ b/go.mod @@ -7,6 +7,7 @@ require ( github.com/antchfx/xpath v1.1.10 github.com/bradleyjkemp/cupaloy v2.3.0+incompatible github.com/hashicorp/golang-lru v0.5.4 + github.com/jf-tech/iohelper v1.0.3 github.com/stretchr/testify v1.6.1 golang.org/x/text v0.3.0 ) diff --git a/go.sum b/go.sum index 38c1efb..9c3cf29 100644 --- a/go.sum +++ b/go.sum @@ -10,9 +10,12 @@ github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e h1:1r7pUrabqp18h github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= github.com/hashicorp/golang-lru v0.5.4 h1:YDjusn29QI/Das2iO9M0BHnIbxPeyuCHsjMW+lJfyTc= github.com/hashicorp/golang-lru v0.5.4/go.mod h1:iADmTwqILo4mZ8BN3D2Q6+9jd8WM5uGBxy+E8yxSoD4= +github.com/jf-tech/iohelper v1.0.3 h1:304dQL6ZKJEmDkbIivqCGrhmJTi7k7+1e2CC+WiERS4= +github.com/jf-tech/iohelper v1.0.3/go.mod h1:X28R+KF0lnKEhZ8Q0iBzLI9FKHJy/jXZ+axaM7HMOXA= github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= +github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= github.com/stretchr/testify v1.6.1 h1:hDPOHmpOpP40lSULcqw7IrRb/u7w6RpDC9399XyoNd0= github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= @@ -27,5 +30,6 @@ golang.org/x/text v0.3.0 h1:g61tztE5qeGQ89tm6NTjjM9VPIm088od1l6aSorWRWg= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/nodes/.snapshots/TestReferenceTestTree b/omniparser/nodes/.snapshots/TestReferenceTestTree similarity index 100% rename from nodes/.snapshots/TestReferenceTestTree rename to omniparser/nodes/.snapshots/TestReferenceTestTree diff --git a/nodes/marshal_test.go b/omniparser/nodes/marshal_test.go similarity index 100% rename from nodes/marshal_test.go rename to omniparser/nodes/marshal_test.go diff --git a/nodes/node.go b/omniparser/nodes/node.go similarity index 100% rename from nodes/node.go rename to omniparser/nodes/node.go diff --git a/nodes/node_test.go b/omniparser/nodes/node_test.go similarity index 100% rename from nodes/node_test.go rename to omniparser/nodes/node_test.go diff --git a/nodes/query.go b/omniparser/nodes/query.go similarity index 100% rename from nodes/query.go rename to omniparser/nodes/query.go diff --git a/nodes/query_test.go b/omniparser/nodes/query_test.go similarity index 100% rename from nodes/query_test.go rename to omniparser/nodes/query_test.go diff --git a/omniparser/parser.go b/omniparser/parser.go index b9c09ed..72e46d1 100644 --- a/omniparser/parser.go +++ b/omniparser/parser.go @@ -6,6 +6,8 @@ import ( "io" "io/ioutil" + "github.com/jf-tech/iohelper" + "github.com/jf-tech/omniparser/omniparser/customfuncs" "github.com/jf-tech/omniparser/omniparser/errs" "github.com/jf-tech/omniparser/omniparser/schemaplugin" @@ -31,10 +33,13 @@ type Parser interface { // Extension allows client of omniparser to supply its own custom funcs and/or schema plugin. type Extension struct { + // CustomFuncs contains a collection of custom funcs provided by this extension. Optional. CustomFuncs customfuncs.CustomFuncs + // ParseSchema is a constructor function that matches and creates a schema plugin. Optional. ParseSchema schemaplugin.SchemaParserFunc } +// BuiltinExtensions contains all the built-in extensions (custom funcs, and schema plugins) var BuiltinExtensions = []Extension{ { CustomFuncs: customfuncs.BuiltinCustomFuncs, @@ -91,7 +96,30 @@ func NewParser(schemaName string, schemaReader io.Reader, exts ...Extension) (Pa // GetTransformOp creates and returns an instance of TransformOp for a given input. func (p *parser) GetTransformOp(name string, input io.Reader, ctx *transformctx.Ctx) (TransformOp, error) { - panic("TBD") + input = p.schemaHeader.ParserSettings.WrapEncoding(input) + + br, err := iohelper.StripBOM(input) + if err != nil { + return nil, err + } + + inputProcessor, err := p.schemaPlugin.GetInputProcessor(ctx, br) + if err != nil { + return nil, err + } + + if ctx.InputName != name { + ctx.InputName = name + } + + // If caller already specified a way to do context aware error formatting, use it; + // otherwise (vast majority cases), use the InputProcessor (which implements CtxAwareErr + // interface) created by the schema plugin. + if ctx.CtxAwareErr == nil { + ctx.CtxAwareErr = inputProcessor + } + + return &transformOp{inputProcessor: inputProcessor}, nil } // SchemaHeader returns the associated schema plugin's schema header. diff --git a/omniparser/parser_test.go b/omniparser/parser_test.go index 44a6be0..01910c1 100644 --- a/omniparser/parser_test.go +++ b/omniparser/parser_test.go @@ -10,6 +10,7 @@ import ( "github.com/jf-tech/omniparser/omniparser/errs" "github.com/jf-tech/omniparser/omniparser/schemaplugin" + "github.com/jf-tech/omniparser/omniparser/transformctx" "github.com/jf-tech/omniparser/testlib" ) @@ -91,17 +92,58 @@ func TestNewParser(t *testing.T) { } } -func TestParser(t *testing.T) { +func TestParser_GetTransformOp_StripBOMFailure(t *testing.T) { + p := &parser{ + schemaHeader: schemaplugin.Header{ + ParserSettings: schemaplugin.ParserSettings{Version: "999", FileFormatType: "exe"}, + }, + } + op, err := p.GetTransformOp("test input", testlib.NewMockReadCloser("bom read failure", nil), nil) + assert.Error(t, err) + assert.Equal(t, "bom read failure", err.Error()) + assert.Nil(t, op) +} + +type testSchemaPlugin struct { + getInputProcessorErr error +} + +func (t testSchemaPlugin) GetInputProcessor(_ *transformctx.Ctx, _ io.Reader) (schemaplugin.InputProcessor, error) { + if t.getInputProcessorErr != nil { + return nil, t.getInputProcessorErr + } + return &testInputProcessor{}, nil +} + +func TestParser_GetTransformOp_GetInputProcessorFailure(t *testing.T) { + p := &parser{ + schemaHeader: schemaplugin.Header{ + ParserSettings: schemaplugin.ParserSettings{Version: "999", FileFormatType: "exe"}, + }, + schemaPlugin: testSchemaPlugin{getInputProcessorErr: errors.New("test failure")}, + } + op, err := p.GetTransformOp("test input", strings.NewReader("something"), nil) + assert.Error(t, err) + assert.Equal(t, "test failure", err.Error()) + assert.Nil(t, op) +} + +func TestParser_GetTransformOp_NameAndCtxAwareErrOverwrite(t *testing.T) { header := schemaplugin.Header{ ParserSettings: schemaplugin.ParserSettings{Version: "999", FileFormatType: "exe"}, } p := &parser{ schemaHeader: header, schemaContent: []byte("test schema content"), + schemaPlugin: testSchemaPlugin{}, } - assert.Panics(t, func() { - _, _ = p.GetTransformOp("name", nil, nil) - }) + ctx := &transformctx.Ctx{} + op, err := p.GetTransformOp("test input", strings.NewReader("something"), ctx) + assert.NoError(t, err) + assert.NotNil(t, op) + assert.Equal(t, "test input", ctx.InputName) + assert.NotNil(t, ctx.CtxAwareErr) + assert.Equal(t, header, p.SchemaHeader()) - assert.Equal(t, []byte("test schema content"), p.SchemaContent()) + assert.Equal(t, "test schema content", string(p.SchemaContent())) } diff --git a/omniparser/schemaplugin/header.go b/omniparser/schemaplugin/header.go index c22b815..8d4b9be 100644 --- a/omniparser/schemaplugin/header.go +++ b/omniparser/schemaplugin/header.go @@ -20,28 +20,27 @@ type ParserSettings struct { } const ( - // EncodingUTF8 is the UTF-8 (golang's default) encoding scheme. - EncodingUTF8 = "utf-8" - // EncodingISO8859_1 is the ISO 8859-1 encoding. - EncodingISO8859_1 = "iso-8859-1" - // EncodingWindows1252 is the Windows 1252 encoding. - EncodingWindows1252 = "windows-1252" + encodingUTF8 = "utf-8" + encodingISO8859_1 = "iso-8859-1" + encodingWindows1252 = "windows-1252" ) type encodingMappingFunc func(reader io.Reader) io.Reader -// SupportedEncodingMappings provides mapping between input stream reader and a func that does -// encoding specific translation. -var SupportedEncodingMappings = map[string]encodingMappingFunc{ - EncodingUTF8: func(r io.Reader) io.Reader { return r }, - EncodingISO8859_1: func(r io.Reader) io.Reader { return charmap.ISO8859_1.NewDecoder().Reader(r) }, - EncodingWindows1252: func(r io.Reader) io.Reader { return charmap.Windows1252.NewDecoder().Reader(r) }, +var supportedEncodingMappings = map[string]encodingMappingFunc{ + encodingUTF8: func(r io.Reader) io.Reader { return r }, + encodingISO8859_1: func(r io.Reader) io.Reader { return charmap.ISO8859_1.NewDecoder().Reader(r) }, + encodingWindows1252: func(r io.Reader) io.Reader { return charmap.Windows1252.NewDecoder().Reader(r) }, } -// GetEncoding returns the encoding of the schema. If no encoding is specified in the schema, which -// the most comment default case, it assumes the input stream will be in UTF-8. -func (p ParserSettings) GetEncoding() string { - return strs.StrPtrOrElse(p.Encoding, EncodingUTF8) +// WrapEncoding returns an io.Reader that ensures the encoding scheme matches what's specified +// in 'parser_settings.encoding' setting. +func (p ParserSettings) WrapEncoding(input io.Reader) io.Reader { + f, found := supportedEncodingMappings[strs.StrPtrOrElse(p.Encoding, encodingUTF8)] + if !found { + f = supportedEncodingMappings[encodingUTF8] + } + return f(input) } // Header contains the common ParserSettings for all schemas. diff --git a/omniparser/schemaplugin/header_test.go b/omniparser/schemaplugin/header_test.go index 0e5944e..113eb4d 100644 --- a/omniparser/schemaplugin/header_test.go +++ b/omniparser/schemaplugin/header_test.go @@ -1,6 +1,8 @@ package schemaplugin import ( + "bytes" + "io" "io/ioutil" "sort" "strings" @@ -8,6 +10,7 @@ import ( "github.com/bradleyjkemp/cupaloy" "github.com/stretchr/testify/assert" + "golang.org/x/text/encoding/charmap" "github.com/jf-tech/omniparser/jsons" "github.com/jf-tech/omniparser/testlib" @@ -15,7 +18,7 @@ import ( func TestSupportedEncodingMappingsDump(t *testing.T) { var supported []string - for k := range SupportedEncodingMappings { + for k := range supportedEncodingMappings { supported = append(supported, k) } sort.Strings(supported) @@ -23,7 +26,7 @@ func TestSupportedEncodingMappingsDump(t *testing.T) { } func TestSupportedEncodingMappings(t *testing.T) { - for encoding, mappingFn := range SupportedEncodingMappings { + for encoding, mappingFn := range supportedEncodingMappings { t.Run(encoding, func(t *testing.T) { actual, err := ioutil.ReadAll(mappingFn(strings.NewReader("test"))) assert.NoError(t, err) @@ -32,15 +35,28 @@ func TestSupportedEncodingMappings(t *testing.T) { } } -func TestGetEncoding(t *testing.T) { - assert.Equal( - t, EncodingUTF8, (ParserSettings{Encoding: testlib.StrPtr(EncodingUTF8)}).GetEncoding()) - assert.Equal( - t, EncodingISO8859_1, (ParserSettings{Encoding: testlib.StrPtr(EncodingISO8859_1)}).GetEncoding()) - assert.Equal( - t, EncodingWindows1252, (ParserSettings{Encoding: testlib.StrPtr(EncodingWindows1252)}).GetEncoding()) - assert.Equal( - t, EncodingUTF8, (ParserSettings{}).GetEncoding()) - assert.Equal( - t, "whatever", (ParserSettings{Encoding: testlib.StrPtr("whatever")}).GetEncoding()) +func TestWrapEncoding(t *testing.T) { + readAll := func(r io.Reader) string { + b, err := ioutil.ReadAll(r) + assert.NoError(t, err) + return string(b) + } + // No 'parser_settings.encoding' ==> UTF-8 + assert.Equal(t, "test", readAll(ParserSettings{}.WrapEncoding(strings.NewReader("test")))) + // 'parser_settings.encoding' = UTF-8 + assert.Equal(t, "test", readAll( + ParserSettings{Encoding: testlib.StrPtr(encodingUTF8)}.WrapEncoding(strings.NewReader("test")))) + // 'parser_settings.encoding' = ==> UTF-8 + assert.Equal(t, "test", readAll( + ParserSettings{Encoding: testlib.StrPtr("unknown")}.WrapEncoding(strings.NewReader("test")))) + // 'parser_settings.encoding' = ISO-8859-1 + iso88591bytes, err := charmap.ISO8859_1.NewEncoder().Bytes([]byte("test")) + assert.NoError(t, err) + assert.Equal(t, "test", readAll( + ParserSettings{Encoding: testlib.StrPtr(encodingISO8859_1)}.WrapEncoding(bytes.NewReader(iso88591bytes)))) + // 'parser_settings.encoding' = windows-1252 + windows1252bytes, err := charmap.Windows1252.NewEncoder().Bytes([]byte("test")) + assert.NoError(t, err) + assert.Equal(t, "test", readAll( + ParserSettings{Encoding: testlib.StrPtr(encodingWindows1252)}.WrapEncoding(bytes.NewReader(windows1252bytes)))) } diff --git a/omniparser/transformctx/ctx.go b/omniparser/transformctx/ctx.go index 6147d96..b6cab9e 100644 --- a/omniparser/transformctx/ctx.go +++ b/omniparser/transformctx/ctx.go @@ -1,5 +1,23 @@ package transformctx +import ( + "github.com/jf-tech/omniparser/omniparser/errs" +) + +// ExtensionCtx is a context object supplied by an extension. An extension +// of omniparser can supply its own custom funcs and/or its own schema plugin. +// This ctx object allows caller to "communicates" with its supplied extension +// custom funcs and/or schema plugin. +type ExtensionCtx = interface{} + // Ctx contains the context object used throughout the lifespan of a TransformOp action. type Ctx struct { + // InputName is the name of the input stream to be processed. + InputName string + // CtxAwareErr allows context aware error formatting such as adding input (file) name + // and line number as a prefix to the error string. + CtxAwareErr errs.CtxAwareErr + // ExtCtx is extension specific context object that allows communications between + // caller and extension's custom functions and/or schema plugin. + ExtCtx ExtensionCtx } From 82fce027aefbe0c5848055b9d459fea9436fe462 Mon Sep 17 00:00:00 2001 From: jf-tech Date: Tue, 1 Sep 2020 13:17:03 +1200 Subject: [PATCH 2/2] adjust space --- omniparser/parser.go | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/omniparser/parser.go b/omniparser/parser.go index 72e46d1..adf2af4 100644 --- a/omniparser/parser.go +++ b/omniparser/parser.go @@ -96,29 +96,23 @@ func NewParser(schemaName string, schemaReader io.Reader, exts ...Extension) (Pa // GetTransformOp creates and returns an instance of TransformOp for a given input. func (p *parser) GetTransformOp(name string, input io.Reader, ctx *transformctx.Ctx) (TransformOp, error) { - input = p.schemaHeader.ParserSettings.WrapEncoding(input) - - br, err := iohelper.StripBOM(input) + br, err := iohelper.StripBOM(p.schemaHeader.ParserSettings.WrapEncoding(input)) if err != nil { return nil, err } - inputProcessor, err := p.schemaPlugin.GetInputProcessor(ctx, br) if err != nil { return nil, err } - if ctx.InputName != name { ctx.InputName = name } - // If caller already specified a way to do context aware error formatting, use it; // otherwise (vast majority cases), use the InputProcessor (which implements CtxAwareErr // interface) created by the schema plugin. if ctx.CtxAwareErr == nil { ctx.CtxAwareErr = inputProcessor } - return &transformOp{inputProcessor: inputProcessor}, nil }