From 5dbc2f43d8b7fa89b46419dc2846bc32826de384 Mon Sep 17 00:00:00 2001 From: Inhere Date: Sat, 15 Oct 2022 20:54:32 +0800 Subject: [PATCH] up: use the goutil/strutil/textscan refator the parse logic --- README.md | 17 +++-- README.zh-CN.md | 18 ++++- dotenv/dotenv.go | 1 + dotenv/dotenv_test.go | 1 + parser/options.go | 4 +- parser/parser.go | 170 +++++++++++++++++++----------------------- parser/parser_test.go | 55 +++++++++++--- testdata/export.ini | 2 +- 8 files changed, 155 insertions(+), 113 deletions(-) diff --git a/README.md b/README.md index 353ec94..3936fab 100644 --- a/README.md +++ b/README.md @@ -15,13 +15,20 @@ INI contents parser by golang, INI config data management library. - Easy to use(get: `Int` `Int64` `Bool` `String` `StringMap` ..., set: `Set`) - Support multi file, data load -- Support for rebinding data to structure +- Support for decode data to struct - Support data override merge - Support parse ENV variable - Support comments start with `;` `#` - Complete unit test(coverage > 90%) - Support variable reference, default compatible with Python's configParser format `%(VAR)s` -- Sub-package `dotenv` that supports importing ENV data from files (eg `.env`) + +### [Parser](./parser) + +Package `parser` is a Parser for parse INI format content to golang data + +### [Dotenv](./dotenv) + +Package `dotenv` that supports importing ENV data from files (eg `.env`) ## More formats @@ -219,17 +226,17 @@ type Options struct { } ``` -- setting options for default instance +Setting options for default instance: ```go ini.WithOptions(ini.ParseEnv,ini.ParseVar) ``` -- setting options with new instance +Setting options with new instance: ```go cfg := ini.New() -cfg.WithOptions(ini.ParseEnv,ini.ParseVar, func (opts *Options) { +cfg.WithOptions(ini.ParseEnv, ini.ParseVar, func (opts *Options) { opts.SectionSep = ":" opts.DefSection = "default" }) diff --git a/README.zh-CN.md b/README.zh-CN.md index 94fa059..93f5336 100644 --- a/README.zh-CN.md +++ b/README.zh-CN.md @@ -11,15 +11,25 @@ INI格式内容解析; 使用INI格式作为配置,配置数据的加载,管 > **[EN README](README.md)** +## 功能简介 + - 使用简单(获取: `Int` `Int64` `Bool` `String` `StringMap` ..., 设置: `Set` ) - 支持多文件,数据加载 - 支持数据覆盖合并 -- 支持将数据重新绑定到结构体 -- 支持解析 ENV 变量名 +- 支持将数据绑定到结构体 +- 支持解析 `ENV` 变量名 - 支持使用 `;` `#` 注释一行 -- 支持变量参考,默认兼容Python的configParser格式 `%(VAR)s` +- 支持变量参考引用 + - 默认兼容 Python 的 configParser 格式 `%(VAR)s` - 完善的单元测试(coverage > 90%) -- 子包 `dotenv` - 提供了加载解析 `.env` 文件数据为ENV环境变量 + +### [Parser](./parser) + +子包 `parser` - 实现了解析 `INI` 格式内容为 Go 数据 + +### [Dotenv](./dotenv) + +子包 `dotenv` - 提供了加载解析 `.env` 文件数据为ENV环境变量 ## 更多格式 diff --git a/dotenv/dotenv.go b/dotenv/dotenv.go index 7e220bb..ac324fe 100644 --- a/dotenv/dotenv.go +++ b/dotenv/dotenv.go @@ -169,6 +169,7 @@ func getVal(name string) (val string, ok bool) { // cached if val = loadedData[name]; val != "" { + ok = true return } diff --git a/dotenv/dotenv_test.go b/dotenv/dotenv_test.go index adac99b..1b9dc58 100644 --- a/dotenv/dotenv_test.go +++ b/dotenv/dotenv_test.go @@ -39,6 +39,7 @@ func TestLoadFiles(t *testing.T) { err := LoadFiles("./testdata/.env") assert.NoErr(t, err) + assert.NotEmpty(t, LoadedData()) assert.Eq(t, "blog", os.Getenv("DONT_ENV_TEST")) assert.Eq(t, "blog", Get("DONT_ENV_TEST")) } diff --git a/parser/options.go b/parser/options.go index d627590..7f32449 100644 --- a/parser/options.go +++ b/parser/options.go @@ -2,8 +2,8 @@ package parser // mode of parse data // -// ModeFull - will parse inline array -// ModeLite/ModeSimple - don't parse array value +// ModeFull - will parse array value and inline array +// ModeLite/ModeSimple - don't parse array value line const ( ModeFull parseMode = 1 ModeLite parseMode = 2 diff --git a/parser/parser.go b/parser/parser.go index 29ecc95..414f25f 100644 --- a/parser/parser.go +++ b/parser/parser.go @@ -39,48 +39,38 @@ import ( "regexp" "strings" + "github.com/gookit/goutil/strutil/textscan" "github.com/mitchellh/mapstructure" ) -// errSyntax is returned when there is a syntax error in an INI file. -type errSyntax struct { - Line int - // Source The contents of the erroneous line, without leading or trailing whitespace - Source string -} +// match: [section] +var sectionRegex = regexp.MustCompile(`^\[(.*)]$`) -// Error message return -func (e errSyntax) Error() string { - return fmt.Sprintf("invalid INI syntax on line %d: %s", e.Line, e.Source) -} +// TokSection for mark a section +const TokSection = textscan.TokComments + 1 + iota -var ( - // match: [section] - sectionRegex = regexp.MustCompile(`^\[(.*)]$`) - // match: foo[] = val - assignArrRegex = regexp.MustCompile(`^([^=\[\]]+)\[][^=]*=(.*)$`) - // match: key = val - assignRegex = regexp.MustCompile(`^([^=]+)=(.*)$`) - // quote ' " - quotesRegex = regexp.MustCompile(`^(['"])(.*)(['"])$`) -) +// SectionMatcher match section line: [section] +type SectionMatcher struct{} -// special chars consts -const ( - MultiLineValMarkS = "'''" - MultiLineValMarkD = `"""` -) +// Match section line: [section] +func (m *SectionMatcher) Match(text string, prev textscan.Token) (textscan.Token, error) { + line := strings.TrimSpace(text) -// token consts -const ( - TokMLValMarkS = 'm' // multi line value by single quotes: ''' - TokMLValMarkD = 'M' // multi line value by double quotes: """ -) + if matched := sectionRegex.FindStringSubmatch(line); matched != nil { + section := strings.TrimSpace(matched[1]) + tok := textscan.NewStringToken(TokSection, section) + return tok, nil + } + + return nil, nil +} // Parser definition type Parser struct { *Options // parsed bool + // comments map, key is name + comments map[string]string // for full parse(allow array, map section) fullData map[string]any @@ -185,6 +175,7 @@ func (p *Parser) init() { // if p.IgnoreCase { // p.DefSection = strings.ToLower(p.DefSection) // } + p.comments = make(map[string]string) if p.ParseMode == ModeFull { p.fullData = make(map[string]any) @@ -202,56 +193,72 @@ func (p *Parser) init() { } // ParseFrom a data scanner -func (p *Parser) ParseFrom(in *bufio.Scanner) (bytes int64, err error) { +func (p *Parser) ParseFrom(in *bufio.Scanner) (count int64, err error) { p.init() + count = -1 + + // create scanner + ts := textscan.NewScanner(in) + ts.AddKind(TokSection, "Section") + ts.AddMatchers( + &textscan.CommentsMatcher{ + InlineChars: []byte{'#', ';'}, + }, + &SectionMatcher{}, + &textscan.KeyValueMatcher{ + MergeComments: true, + InlineComment: p.InlineComment, + }, + ) - bytes = -1 - lineNum := 0 section := p.DefSection - var readOk bool - for readOk = in.Scan(); readOk; readOk = in.Scan() { - line := in.Text() - - bytes++ // newline - bytes += int64(len(line)) + // scan and parsing + for ts.Scan() { + tok := ts.Token() - lineNum++ - line = strings.TrimSpace(line) - if len(line) == 0 { // Skip blank lines + // comments has been merged to value token + if !tok.IsValid() || tok.Kind() == textscan.TokComments { continue } - if line[0] == ';' || line[0] == '#' { // Skip comments + if tok.Kind() == TokSection { + section = tok.Value() + + // collect comments + if textscan.IsKindToken(textscan.TokComments, ts.PrevToken()) { + p.comments["_sec_"+section] = ts.PrevToken().Value() + } continue } - // array/slice data - if matched := assignArrRegex.FindStringSubmatch(line); matched != nil { - // skip array parse on lite mode - if p.ParseMode == ModeLite { - continue - } + // collect value + if tok.Kind() == textscan.TokValue { + vt := tok.(*textscan.ValueToken) - key, val := strings.TrimSpace(matched[1]), trimWithQuotes(matched[2]) + var isSli bool + key := vt.Key() - p.collectValue(section, key, val, true) - } else if matched := assignRegex.FindStringSubmatch(line); matched != nil { - key, val := strings.TrimSpace(matched[1]), trimWithQuotes(matched[2]) + // is array index + if strings.HasSuffix(key, "[]") { + // skip parse array on lite mode + if p.ParseMode == ModeLite { + continue + } - p.collectValue(section, key, val, false) - } else if matched := sectionRegex.FindStringSubmatch(line); matched != nil { - section = strings.TrimSpace(matched[1]) - } else { - err = errSyntax{lineNum, line} - return + key = key[:len(key)-2] + isSli = true + } + + p.collectValue(section, key, vt.Value(), isSli) + if vt.HasComment() { + p.comments[section+"_"+key] = vt.Comment() + } } } - err = in.Err() - if bytes < 0 { - bytes = 0 - } + count = 0 + err = ts.Err() return } @@ -261,15 +268,11 @@ func (p *Parser) collectValue(section, key, val string, isSlice bool) { section = strings.ToLower(section) } - if p.InlineComment { - val, _ = splitInlineComment(val) - } - if p.ReplaceNl { val = strings.ReplaceAll(val, `\n`, "\n") } - p.Collector(section, key, val, false) + p.Collector(section, key, val, isSlice) } func (p *Parser) collectFullValue(section, key, val string, isSlice bool) { @@ -305,8 +308,7 @@ func (p *Parser) collectFullValue(section, key, val string, isSlice bool) { switch sd := secData.(type) { case map[string]any: // existed section - curVal, ok := sd[key] - if ok { + if curVal, ok := sd[key]; ok { switch cv := curVal.(type) { case string: if isSlice { @@ -351,22 +353,11 @@ func (p *Parser) collectLiteValue(sec, key, val string, _ bool) { } } -func splitInlineComment(val string) (string, string) { - if pos := strings.IndexRune(val, '#'); pos > -1 { - return strings.TrimRight(val[0:pos], " "), val[pos:] - } - - if pos := strings.Index(val, "//"); pos > -1 { - return strings.TrimRight(val[0:pos], " "), val[pos:] - } - return val, "" -} - /************************************************************* * export data *************************************************************/ -// Decode mapping the parsed data to struct ptr +// Decode the parsed data to struct ptr func (p *Parser) Decode(ptr any) error { return p.MapStruct(ptr) } @@ -436,6 +427,11 @@ func mapStruct(tagName string, data any, ptr any) error { * helper methods *************************************************************/ +// Comments get +func (p *Parser) Comments() map[string]string { + return p.comments +} + // ParsedData get parsed data func (p *Parser) ParsedData() interface{} { if p.ParseMode == ModeFull { @@ -473,13 +469,3 @@ func (p *Parser) Reset() { p.liteData = make(map[string]map[string]string) } } - -func trimWithQuotes(inputVal string) (filtered string) { - filtered = strings.TrimSpace(inputVal) - groups := quotesRegex.FindStringSubmatch(filtered) - - if len(groups) > 2 && groups[1] == groups[3] { - filtered = groups[2] - } - return -} diff --git a/parser/parser_test.go b/parser/parser_test.go index b0caac3..72c77af 100644 --- a/parser/parser_test.go +++ b/parser/parser_test.go @@ -1,15 +1,18 @@ package parser import ( + "bufio" "fmt" + "strings" "testing" "github.com/gookit/goutil/dump" + "github.com/gookit/goutil/strutil/textscan" "github.com/gookit/goutil/testutil/assert" ) var iniStr = ` -# comments +# comments 1 name = inhere age = 28 debug = true @@ -18,17 +21,18 @@ hasQuota2 = "this is val1" shell = ${SHELL} noEnv = ${NotExist|defValue} -; array in def section +; array in default section tags[] = a tags[] = b tags[] = c -; comments +; comments 2 [sec1] key = val0 some = value stuff = things -; array in section + +; array in section sec1 types[] = x types[] = y ` @@ -105,7 +109,7 @@ two_words = abc def is.Eq("[a b]", fmt.Sprintf("%v", data["arr"])) is.Eq("map[key:val]", fmt.Sprintf("%v", data["sec"])) - st := struct { + type myConf struct { Age int Name string Sec1 struct { @@ -113,10 +117,27 @@ two_words = abc def Number int TwoWords string `ini:"two_words"` } - }{} + } - is.Nil(Decode(bts, &st)) + st := &myConf{} + is.NoErr(Decode(bts, st)) + is.Eq(23, st.Age) + is.Eq("inhere", st.Name) + is.Eq(2020, st.Sec1.Number) + is.Eq("abc def", st.Sec1.TwoWords) dump.P(st) + + // Unmarshal + p := NewLite(func(opt *Options) { + opt.NoDefSection = true + }) + + st = &myConf{} + is.NoErr(p.Unmarshal(bts, st)) + is.Eq(23, st.Age) + is.Eq("inhere", st.Name) + is.Eq(2020, st.Sec1.Number) + is.Eq("abc def", st.Sec1.TwoWords) } func TestNewSimpled(t *testing.T) { @@ -131,8 +152,9 @@ func TestNewSimpled(t *testing.T) { err := p.ParseString("invalid string") is.Err(err) - is.IsType(errSyntax{}, err) - is.Contains(err.Error(), "invalid INI syntax on line") + is.IsType(textscan.ErrScan{}, err) + // is.Contains(err.Error(), "invalid syntax, no matcher available") + is.Contains(err.Error(), "line 1: invalid string") err = p.ParseString("") is.NoErr(err) @@ -143,6 +165,7 @@ func TestNewSimpled(t *testing.T) { is.Nil(err) data := p.SimpleData() + dump.P(data, p.Comments()) str := fmt.Sprintf("%v", data) is.Contains(str, "hasQuota2:") is.NotContains(str, "hasquota1:") @@ -191,7 +214,9 @@ key = val0 is.Nil(err) v := p.ParsedData() + dump.P(v, p.Comments()) is.NotEmpty(v) + is.ContainsKey(v, "sec1") // options: ignore case p = NewFulled(IgnoreCase) @@ -218,6 +243,13 @@ func TestParser_ParseBytes(t *testing.T) { is.Len(p.LiteData(), 0) } +func TestParser_ParseFrom(t *testing.T) { + p := New() + n, err := p.ParseFrom(bufio.NewScanner(strings.NewReader(""))) + assert.Eq(t, int64(0), n) + assert.NoErr(t, err) +} + func TestParser_ParseString(t *testing.T) { p := New(WithParseMode(ModeFull)) err := p.ParseString(` @@ -230,4 +262,9 @@ arr[] = val4 assert.NoErr(t, err) assert.NotEmpty(t, p.fullData) dump.P(p.ParsedData()) + + p.Reset() + assert.NoErr(t, p.ParseString(` +# no values +`)) } diff --git a/testdata/export.ini b/testdata/export.ini index 81046f2..5d2f7aa 100644 --- a/testdata/export.ini +++ b/testdata/export.ini @@ -1,4 +1,4 @@ -# exported at 2022-09-17 14:55:16 +# exported at 2022-10-15 20:43:25 # values for default section age = 28