Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ require (
github.com/antchfx/xpath v1.1.10
github.com/bradleyjkemp/cupaloy v2.3.0+incompatible
github.com/hashicorp/golang-lru v0.5.4
github.com/jf-tech/iohelper v1.0.3
github.com/stretchr/testify v1.6.1
golang.org/x/text v0.3.0
)
4 changes: 4 additions & 0 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -10,9 +10,12 @@ github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e h1:1r7pUrabqp18h
github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
github.com/hashicorp/golang-lru v0.5.4 h1:YDjusn29QI/Das2iO9M0BHnIbxPeyuCHsjMW+lJfyTc=
github.com/hashicorp/golang-lru v0.5.4/go.mod h1:iADmTwqILo4mZ8BN3D2Q6+9jd8WM5uGBxy+E8yxSoD4=
github.com/jf-tech/iohelper v1.0.3 h1:304dQL6ZKJEmDkbIivqCGrhmJTi7k7+1e2CC+WiERS4=
github.com/jf-tech/iohelper v1.0.3/go.mod h1:X28R+KF0lnKEhZ8Q0iBzLI9FKHJy/jXZ+axaM7HMOXA=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA=
github.com/stretchr/testify v1.6.1 h1:hDPOHmpOpP40lSULcqw7IrRb/u7w6RpDC9399XyoNd0=
github.com/stretchr/testify v1.6.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
Expand All @@ -27,5 +30,6 @@ golang.org/x/text v0.3.0 h1:g61tztE5qeGQ89tm6NTjjM9VPIm088od1l6aSorWRWg=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c h1:dUUwHk2QECo/6vqA44rthZ8ie2QXMNeKRTHCNY2nXvo=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
File renamed without changes.
24 changes: 23 additions & 1 deletion omniparser/parser.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ import (
"io"
"io/ioutil"

"github.com/jf-tech/iohelper"

"github.com/jf-tech/omniparser/omniparser/customfuncs"
"github.com/jf-tech/omniparser/omniparser/errs"
"github.com/jf-tech/omniparser/omniparser/schemaplugin"
Expand All @@ -31,10 +33,13 @@ type Parser interface {

// Extension allows client of omniparser to supply its own custom funcs and/or schema plugin.
type Extension struct {
// CustomFuncs contains a collection of custom funcs provided by this extension. Optional.
CustomFuncs customfuncs.CustomFuncs
// ParseSchema is a constructor function that matches and creates a schema plugin. Optional.
ParseSchema schemaplugin.SchemaParserFunc
}

// BuiltinExtensions contains all the built-in extensions (custom funcs, and schema plugins)
var BuiltinExtensions = []Extension{
{
CustomFuncs: customfuncs.BuiltinCustomFuncs,
Expand Down Expand Up @@ -91,7 +96,24 @@ func NewParser(schemaName string, schemaReader io.Reader, exts ...Extension) (Pa

// GetTransformOp creates and returns an instance of TransformOp for a given input.
func (p *parser) GetTransformOp(name string, input io.Reader, ctx *transformctx.Ctx) (TransformOp, error) {
panic("TBD")
br, err := iohelper.StripBOM(p.schemaHeader.ParserSettings.WrapEncoding(input))
if err != nil {
return nil, err
}
inputProcessor, err := p.schemaPlugin.GetInputProcessor(ctx, br)
if err != nil {
return nil, err
}
if ctx.InputName != name {
ctx.InputName = name
}
// If caller already specified a way to do context aware error formatting, use it;
// otherwise (vast majority cases), use the InputProcessor (which implements CtxAwareErr
// interface) created by the schema plugin.
if ctx.CtxAwareErr == nil {
ctx.CtxAwareErr = inputProcessor
}
return &transformOp{inputProcessor: inputProcessor}, nil
}

// SchemaHeader returns the associated schema plugin's schema header.
Expand Down
52 changes: 47 additions & 5 deletions omniparser/parser_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (

"github.com/jf-tech/omniparser/omniparser/errs"
"github.com/jf-tech/omniparser/omniparser/schemaplugin"
"github.com/jf-tech/omniparser/omniparser/transformctx"
"github.com/jf-tech/omniparser/testlib"
)

Expand Down Expand Up @@ -91,17 +92,58 @@ func TestNewParser(t *testing.T) {
}
}

func TestParser(t *testing.T) {
func TestParser_GetTransformOp_StripBOMFailure(t *testing.T) {
p := &parser{
schemaHeader: schemaplugin.Header{
ParserSettings: schemaplugin.ParserSettings{Version: "999", FileFormatType: "exe"},
},
}
op, err := p.GetTransformOp("test input", testlib.NewMockReadCloser("bom read failure", nil), nil)
assert.Error(t, err)
assert.Equal(t, "bom read failure", err.Error())
assert.Nil(t, op)
}

type testSchemaPlugin struct {
getInputProcessorErr error
}

func (t testSchemaPlugin) GetInputProcessor(_ *transformctx.Ctx, _ io.Reader) (schemaplugin.InputProcessor, error) {
if t.getInputProcessorErr != nil {
return nil, t.getInputProcessorErr
}
return &testInputProcessor{}, nil
}

func TestParser_GetTransformOp_GetInputProcessorFailure(t *testing.T) {
p := &parser{
schemaHeader: schemaplugin.Header{
ParserSettings: schemaplugin.ParserSettings{Version: "999", FileFormatType: "exe"},
},
schemaPlugin: testSchemaPlugin{getInputProcessorErr: errors.New("test failure")},
}
op, err := p.GetTransformOp("test input", strings.NewReader("something"), nil)
assert.Error(t, err)
assert.Equal(t, "test failure", err.Error())
assert.Nil(t, op)
}

func TestParser_GetTransformOp_NameAndCtxAwareErrOverwrite(t *testing.T) {
header := schemaplugin.Header{
ParserSettings: schemaplugin.ParserSettings{Version: "999", FileFormatType: "exe"},
}
p := &parser{
schemaHeader: header,
schemaContent: []byte("test schema content"),
schemaPlugin: testSchemaPlugin{},
}
assert.Panics(t, func() {
_, _ = p.GetTransformOp("name", nil, nil)
})
ctx := &transformctx.Ctx{}
op, err := p.GetTransformOp("test input", strings.NewReader("something"), ctx)
assert.NoError(t, err)
assert.NotNil(t, op)
assert.Equal(t, "test input", ctx.InputName)
assert.NotNil(t, ctx.CtxAwareErr)

assert.Equal(t, header, p.SchemaHeader())
assert.Equal(t, []byte("test schema content"), p.SchemaContent())
assert.Equal(t, "test schema content", string(p.SchemaContent()))
}
31 changes: 15 additions & 16 deletions omniparser/schemaplugin/header.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,28 +20,27 @@ type ParserSettings struct {
}

const (
// EncodingUTF8 is the UTF-8 (golang's default) encoding scheme.
EncodingUTF8 = "utf-8"
// EncodingISO8859_1 is the ISO 8859-1 encoding.
EncodingISO8859_1 = "iso-8859-1"
// EncodingWindows1252 is the Windows 1252 encoding.
EncodingWindows1252 = "windows-1252"
encodingUTF8 = "utf-8"
encodingISO8859_1 = "iso-8859-1"
encodingWindows1252 = "windows-1252"
)

type encodingMappingFunc func(reader io.Reader) io.Reader

// SupportedEncodingMappings provides mapping between input stream reader and a func that does
// encoding specific translation.
var SupportedEncodingMappings = map[string]encodingMappingFunc{
EncodingUTF8: func(r io.Reader) io.Reader { return r },
EncodingISO8859_1: func(r io.Reader) io.Reader { return charmap.ISO8859_1.NewDecoder().Reader(r) },
EncodingWindows1252: func(r io.Reader) io.Reader { return charmap.Windows1252.NewDecoder().Reader(r) },
var supportedEncodingMappings = map[string]encodingMappingFunc{
encodingUTF8: func(r io.Reader) io.Reader { return r },
encodingISO8859_1: func(r io.Reader) io.Reader { return charmap.ISO8859_1.NewDecoder().Reader(r) },
encodingWindows1252: func(r io.Reader) io.Reader { return charmap.Windows1252.NewDecoder().Reader(r) },
}

// GetEncoding returns the encoding of the schema. If no encoding is specified in the schema, which
// the most comment default case, it assumes the input stream will be in UTF-8.
func (p ParserSettings) GetEncoding() string {
return strs.StrPtrOrElse(p.Encoding, EncodingUTF8)
// WrapEncoding returns an io.Reader that ensures the encoding scheme matches what's specified
// in 'parser_settings.encoding' setting.
func (p ParserSettings) WrapEncoding(input io.Reader) io.Reader {
f, found := supportedEncodingMappings[strs.StrPtrOrElse(p.Encoding, encodingUTF8)]
if !found {
f = supportedEncodingMappings[encodingUTF8]
}
return f(input)
}

// Header contains the common ParserSettings for all schemas.
Expand Down
42 changes: 29 additions & 13 deletions omniparser/schemaplugin/header_test.go
Original file line number Diff line number Diff line change
@@ -1,29 +1,32 @@
package schemaplugin

import (
"bytes"
"io"
"io/ioutil"
"sort"
"strings"
"testing"

"github.com/bradleyjkemp/cupaloy"
"github.com/stretchr/testify/assert"
"golang.org/x/text/encoding/charmap"

"github.com/jf-tech/omniparser/jsons"
"github.com/jf-tech/omniparser/testlib"
)

func TestSupportedEncodingMappingsDump(t *testing.T) {
var supported []string
for k := range SupportedEncodingMappings {
for k := range supportedEncodingMappings {
supported = append(supported, k)
}
sort.Strings(supported)
cupaloy.SnapshotT(t, jsons.BPM(supported))
}

func TestSupportedEncodingMappings(t *testing.T) {
for encoding, mappingFn := range SupportedEncodingMappings {
for encoding, mappingFn := range supportedEncodingMappings {
t.Run(encoding, func(t *testing.T) {
actual, err := ioutil.ReadAll(mappingFn(strings.NewReader("test")))
assert.NoError(t, err)
Expand All @@ -32,15 +35,28 @@ func TestSupportedEncodingMappings(t *testing.T) {
}
}

func TestGetEncoding(t *testing.T) {
assert.Equal(
t, EncodingUTF8, (ParserSettings{Encoding: testlib.StrPtr(EncodingUTF8)}).GetEncoding())
assert.Equal(
t, EncodingISO8859_1, (ParserSettings{Encoding: testlib.StrPtr(EncodingISO8859_1)}).GetEncoding())
assert.Equal(
t, EncodingWindows1252, (ParserSettings{Encoding: testlib.StrPtr(EncodingWindows1252)}).GetEncoding())
assert.Equal(
t, EncodingUTF8, (ParserSettings{}).GetEncoding())
assert.Equal(
t, "whatever", (ParserSettings{Encoding: testlib.StrPtr("whatever")}).GetEncoding())
func TestWrapEncoding(t *testing.T) {
readAll := func(r io.Reader) string {
b, err := ioutil.ReadAll(r)
assert.NoError(t, err)
return string(b)
}
// No 'parser_settings.encoding' ==> UTF-8
assert.Equal(t, "test", readAll(ParserSettings{}.WrapEncoding(strings.NewReader("test"))))
// 'parser_settings.encoding' = UTF-8
assert.Equal(t, "test", readAll(
ParserSettings{Encoding: testlib.StrPtr(encodingUTF8)}.WrapEncoding(strings.NewReader("test"))))
// 'parser_settings.encoding' = <unknown> ==> UTF-8
assert.Equal(t, "test", readAll(
ParserSettings{Encoding: testlib.StrPtr("unknown")}.WrapEncoding(strings.NewReader("test"))))
// 'parser_settings.encoding' = ISO-8859-1
iso88591bytes, err := charmap.ISO8859_1.NewEncoder().Bytes([]byte("test"))
assert.NoError(t, err)
assert.Equal(t, "test", readAll(
ParserSettings{Encoding: testlib.StrPtr(encodingISO8859_1)}.WrapEncoding(bytes.NewReader(iso88591bytes))))
// 'parser_settings.encoding' = windows-1252
windows1252bytes, err := charmap.Windows1252.NewEncoder().Bytes([]byte("test"))
assert.NoError(t, err)
assert.Equal(t, "test", readAll(
ParserSettings{Encoding: testlib.StrPtr(encodingWindows1252)}.WrapEncoding(bytes.NewReader(windows1252bytes))))
}
18 changes: 18 additions & 0 deletions omniparser/transformctx/ctx.go
Original file line number Diff line number Diff line change
@@ -1,5 +1,23 @@
package transformctx

import (
"github.com/jf-tech/omniparser/omniparser/errs"
)

// ExtensionCtx is a context object supplied by an extension. An extension
// of omniparser can supply its own custom funcs and/or its own schema plugin.
// This ctx object allows caller to "communicates" with its supplied extension
// custom funcs and/or schema plugin.
type ExtensionCtx = interface{}

// Ctx contains the context object used throughout the lifespan of a TransformOp action.
type Ctx struct {
// InputName is the name of the input stream to be processed.
InputName string
// CtxAwareErr allows context aware error formatting such as adding input (file) name
// and line number as a prefix to the error string.
CtxAwareErr errs.CtxAwareErr
// ExtCtx is extension specific context object that allows communications between
// caller and extension's custom functions and/or schema plugin.
ExtCtx ExtensionCtx
}