Skip to content

Commit

Permalink
up: update the textscan some logic and update readme
Browse files Browse the repository at this point in the history
  • Loading branch information
inhere committed Oct 15, 2022
1 parent 1b9f75c commit 75780ae
Show file tree
Hide file tree
Showing 7 changed files with 306 additions and 42 deletions.
2 changes: 1 addition & 1 deletion strutil/README.md
Expand Up @@ -8,7 +8,7 @@ This is a go string operate util package.
## Install

```bash
go get github.com/gookit/goutil/dump
go get github.com/gookit/goutil/strutil
```

## Usage
Expand Down
129 changes: 129 additions & 0 deletions strutil/textscan/README.md
@@ -0,0 +1,129 @@
# TextScan

Package `textscan` implements text scanner for quickly parse text contents.
Can use for parse like INI, Properties format contents.

## Install

```shell
go get github.com/gookit/goutil/strutil/textscan
```

## Examples

```go
package main

import (
"fmt"

"github.com/gookit/goutil/dump"
"github.com/gookit/goutil/strutil/textscan"
"github.com/gookit/goutil/testutil/assert"
)

func main() {
ts := textscan.TextScanner{}
ts.AddMatchers(
&textscan.CommentsMatcher{},
&textscan.KeyValueMatcher{},
)

ts.SetInput(`
# comments 1
name = inhere
// comments 2
age = 28
/*
multi line
comments 3
*/
desc = '''
a multi
line string
'''
`)

data := make(map[string]string)
err := ts.Each(func(t textscan.Token) {
fmt.Println("====> Token kind:", t.Kind())
fmt.Println(t.String())

if t.Kind() == textscan.TokValue {
v := t.(*textscan.ValueToken)
data[v.Key()] = v.Value()
}
})

dump.P(data, err)
}
```

**Output:**:

```shell
====> Token kind: Comments
# comments 1
====> Token kind: Value
key: name
value: "inhere"
comments:
====> Token kind: Comments
// comments 2
====> Token kind: Value
key: age
value: "28"
comments:
====> Token kind: Comments
/*
multi line
comments 3
*/
====> Token kind: Value
key: desc
value: "\n\na multi\nline string\n"
comments:

==== Collected data:
map[string]string { #len=3
"desc": string("
a multi
line string
"), #len=22
"name": string("inhere"), #len=6
"age": string("28"), #len=2
},
```

## Functions

```go
package textscan // import "github.com/gookit/goutil/strutil/textscan"

func AddKind(k Kind, name string)
func CommentsDetect(text string) (ok, more bool, err error)
func CommentsDetectEnd(line string) bool
func IsKindToken(k Kind, tok Token) bool
func KindString(k Kind) string
type BaseToken struct{ ... }
type CommentToken struct{ ... }
func NewCommentToken(val string) *CommentToken
type CommentsMatcher struct{ ... }
type EmptyToken struct{ ... }
func NewEmptyToken() *EmptyToken
type HandleFn func(t Token)
type KeyValueMatcher struct{ ... }
type Kind rune
const TokInvalid Kind = iota ...
type LiteToken interface{ ... }
type Matcher interface{ ... }
type Parser struct{ ... }
func NewParser(fn HandleFn) *Parser
type TextScanner struct{ ... }
func NewScanner(in interface{}) *TextScanner
type Token interface{ ... }
type ValueToken struct{ ... }
```
63 changes: 51 additions & 12 deletions strutil/textscan/kvparse.go
Expand Up @@ -14,20 +14,24 @@ import (
const (
MultiLineValMarkS = "'''"
MultiLineValMarkD = `"""`
MultiLineValMarkH = "<<<" // at start
MultiLineValMarkQ = "\\" // at end
MultiLineValMarkH = "<<<" // heredoc at start. <<<TXT ... TXT
MultiLineValMarkQ = "\\" // at end. eg: properties contents
MultiLineCmtEnd = "*/"
// VarRefStartChars = "${"
)

// KeyValueMatcher match key-value token.
// Support parse `KEY=VALUE` line text contents.
type KeyValueMatcher struct {
// Separator string for split key and value, default is "="
Separator string
// MergeComments collect previous comments token to value token.
// If set as True, on each s.Scan() please notice skip TokComments
MergeComments bool
// InlineComment parse and split inline comment
InlineComment bool
// KeyCheckFn set func check key string is valid
KeyCheckFn func(key string) error
}

// Match text line.
Expand All @@ -53,6 +57,13 @@ func (m *KeyValueMatcher) Match(text string, prev Token) (Token, error) {
return nil, errorx.Rawf("key cannot be empty: %q", str)
}

// check key string.
if m.KeyCheckFn != nil {
if err := m.KeyCheckFn(key); err != nil {
return nil, err
}
}

// handle value
vln := len(val)
tok := &ValueToken{
Expand Down Expand Up @@ -142,7 +153,7 @@ func (m *KeyValueMatcher) DetectEnd(mark, text string) (ok bool, val string) {
return
}

// ValueToken struct
// ValueToken contains key and value contents
type ValueToken struct {
BaseToken
m *KeyValueMatcher
Expand Down Expand Up @@ -174,7 +185,7 @@ func (t *ValueToken) Comment() string {
// Value text string.
func (t *ValueToken) Value() string {
if len(t.values) > 0 {
return strings.Join(t.values, "\n")
return strings.Join(t.values, "")
}
return t.value
}
Expand All @@ -184,6 +195,11 @@ func (t *ValueToken) HasMore() bool {
return t.more
}

// HasComment for the value
func (t *ValueToken) HasComment() bool {
return t.comment != nil
}

// MergeSame comments token
func (t *ValueToken) MergeSame(_ Token) error {
return errors.New("merge value token not allowed")
Expand Down Expand Up @@ -215,8 +231,11 @@ func (t *ValueToken) ScanMore(ts *TextScanner) error {
}
}

// CommentsMatcher struct
// CommentsMatcher match comments lines.
// will auto merge prev comments token
type CommentsMatcher struct {
// InlineChars for match inline comments. default is: #
InlineChars []byte
// MatchFn for comments line
// - mark useful on multi line comments
MatchFn func(text string) (ok, more bool, err error)
Expand All @@ -227,7 +246,18 @@ type CommentsMatcher struct {
// Match comments token
func (m *CommentsMatcher) Match(text string, prev Token) (Token, error) {
if m.MatchFn == nil {
m.MatchFn = CommentsDetect
if len(m.InlineChars) == 0 {
m.InlineChars = []byte{'#'}
}

m.MatchFn = func(text string) (ok, more bool, err error) {
return CommentsDetect(text, m.InlineChars)
}
}

// skip empty line
if text = strings.TrimSpace(text); text == "" {
return nil, nil
}

ok, more, err := m.MatchFn(text)
Expand Down Expand Up @@ -257,19 +287,28 @@ func (m *CommentsMatcher) Match(text string, prev Token) (Token, error) {
}

// CommentsDetect check.
func CommentsDetect(text string) (ok, more bool, err error) {
str := strings.TrimSpace(text)
//
// - inlineChars: #
//
// default match:
//
// - inline #, //
// - multi line: /*
func CommentsDetect(str string, inlineChars []byte) (ok, more bool, err error) {
ln := len(str)
if ln == 0 {
return
}

// a line comments
if str[0] == '#' || str[0] == '!' {
ok = true
return
// match inline comments by prefix char.
for _, prefix := range inlineChars {
if str[0] == prefix {
ok = true
return
}
}

// match start withs // OR /*
if str[0] == '/' {
if ln < 2 {
err = errorx.Rawf("invalid contents %q", str)
Expand Down

0 comments on commit 75780ae

Please sign in to comment.