From d9069cd4972d738e9f893b98ba6d9626d27abbd1 Mon Sep 17 00:00:00 2001 From: x1unix Date: Thu, 10 Sep 2020 07:27:10 +0300 Subject: [PATCH 1/2] refactor dotenv parser in order to support multi-line variable values declaration Signed-off-by: x1unix --- godotenv.go | 63 ++++++---------- parser.go | 206 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 230 insertions(+), 39 deletions(-) create mode 100644 parser.go diff --git a/godotenv.go b/godotenv.go index 69e816c..fc1a5f5 100644 --- a/godotenv.go +++ b/godotenv.go @@ -14,10 +14,10 @@ package godotenv import ( - "bufio" "errors" "fmt" "io" + "io/ioutil" "os" "os/exec" "regexp" @@ -27,6 +27,16 @@ import ( const doubleQuoteSpecialChars = "\\\n\r\"!$`" +// Parse reads an env file from io.Reader, returning a map of keys and values. +func Parse(r io.Reader) (map[string]string, error) { + data, err := ioutil.ReadAll(r) + if err != nil { + return nil, err + } + + return UnmarshalBytes(data) +} + // Load will read your env file(s) and load them into ENV for this process. // // Call this function as close as possible to the start of your program (ideally in main) @@ -95,37 +105,16 @@ func Read(filenames ...string) (envMap map[string]string, err error) { return } -// Parse reads an env file from io.Reader, returning a map of keys and values. -func Parse(r io.Reader) (envMap map[string]string, err error) { - envMap = make(map[string]string) - - var lines []string - scanner := bufio.NewScanner(r) - for scanner.Scan() { - lines = append(lines, scanner.Text()) - } - - if err = scanner.Err(); err != nil { - return - } - - for _, fullLine := range lines { - if !isIgnoredLine(fullLine) { - var key, value string - key, value, err = parseLine(fullLine, envMap) - - if err != nil { - return - } - envMap[key] = value - } - } - return +// Unmarshal reads an env file from a string, returning a map of keys and values. +func Unmarshal(str string) (envMap map[string]string, err error) { + return UnmarshalBytes([]byte(str)) } -//Unmarshal reads an env file from a string, returning a map of keys and values. -func Unmarshal(str string) (envMap map[string]string, err error) { - return Parse(strings.NewReader(str)) +// UnmarshalBytes parses env file from byte slice of chars, returning a map of keys and values. +func UnmarshalBytes(src []byte) (map[string]string, error) { + out := make(map[string]string) + err := parseBytes(src, out) + return out, err } // Exec loads env vars from the specified filenames (empty map falls back to default) @@ -136,7 +125,9 @@ func Unmarshal(str string) (envMap map[string]string, err error) { // If you want more fine grained control over your command it's recommended // that you use `Load()` or `Read()` and the `os/exec` package yourself. func Exec(filenames []string, cmd string, cmdArgs []string) error { - Load(filenames...) + if err := Load(filenames...); err != nil { + return err + } command := exec.Command(cmd, cmdArgs...) command.Stdin = os.Stdin @@ -160,8 +151,7 @@ func Write(envMap map[string]string, filename string) error { if err != nil { return err } - file.Sync() - return err + return file.Sync() } // Marshal outputs the given environment as a dotenv-formatted environment file. @@ -197,7 +187,7 @@ func loadFile(filename string, overload bool) error { for key, value := range envMap { if !currentEnv[key] || overload { - os.Setenv(key, value) + _ = os.Setenv(key, value) } } @@ -338,11 +328,6 @@ func expandVariables(v string, m map[string]string) string { }) } -func isIgnoredLine(line string) bool { - trimmedLine := strings.TrimSpace(line) - return len(trimmedLine) == 0 || strings.HasPrefix(trimmedLine, "#") -} - func doubleQuoteEscape(line string) string { for _, c := range doubleQuoteSpecialChars { toReplace := "\\" + string(c) diff --git a/parser.go b/parser.go new file mode 100644 index 0000000..e44d902 --- /dev/null +++ b/parser.go @@ -0,0 +1,206 @@ +package godotenv + +import ( + "bytes" + "errors" + "fmt" + "strings" + "unicode" +) + +const ( + charComment = '#' + prefixSingleQuote = '\'' + prefixDoubleQuote = '"' + + exportPrefix = "export" +) + +func parseBytes(src []byte, out map[string]string) error { + cutset := src + for { + cutset = getStatementStart(cutset) + if cutset == nil { + // reached end of file + break + } + + key, left, err := locateKeyName(cutset) + if err != nil { + return err + } + + value, left, err := extractVarValue(left, out) + if err != nil { + return err + } + + out[key] = value + cutset = left + } + + return nil +} + +// getStatementPosition returns position of statement begin. +// +// It skips any comment line or non-whitespace character. +func getStatementStart(src []byte) []byte { + pos := indexOfNonSpaceChar(src) + if pos == -1 { + return nil + } + + src = src[pos:] + if src[0] != charComment { + return src + } + + // skip comment section + pos = bytes.IndexFunc(src, isCharFunc('\n')) + if pos == -1 { + return nil + } + + return getStatementStart(src[pos:]) +} + +// locateKeyName locates and parses key name and returns rest of slice +func locateKeyName(src []byte) (key string, cutset []byte, err error) { + // trim "export" and space at beginning + src = bytes.TrimLeftFunc(bytes.TrimPrefix(src, []byte(exportPrefix)), isSpace) + + // locate key name end and validate it in single loop + offset := 0 +loop: + for i, char := range src { + rchar := rune(char) + if isSpace(rchar) { + continue + } + + switch char { + case '=', ':': + // library also supports yaml-style value declaration + key = string(src[0:i]) + offset = i + 1 + break loop + case '_': + default: + // variable name should match [A-Za-z0-9_] + if unicode.IsLetter(rchar) || unicode.IsNumber(rchar) { + continue + } + + return "", nil, fmt.Errorf( + `unexpected character %q in variable name near %q`, + string(char), string(src)) + } + } + + if len(src) == 0 { + return "", nil, errors.New("zero length string") + } + + // trim whitespace + key = strings.TrimRightFunc(key, unicode.IsSpace) + cutset = bytes.TrimLeftFunc(src[offset:], isSpace) + return key, cutset, nil +} + +// extractVarValue extracts variable value and returns rest of slice +func extractVarValue(src []byte, vars map[string]string) (value string, rest []byte, err error) { + quote, hasPrefix := hasQuotePrefix(src) + if !hasPrefix { + // unquoted value - read until whitespace + end := bytes.IndexFunc(src, unicode.IsSpace) + if end == -1 { + return expandVariables(string(src), vars), nil, nil + } + + return expandVariables(string(src[0:end]), vars), src[end:], nil + } + + // lookup quoted string terminator + for i := 1; i < len(src); i++ { + if char := src[i]; char != quote { + continue + } + + // skip escaped quote symbol (\" or \', depends on quote) + if prevChar := src[i-1]; prevChar == '\\' { + continue + } + + // trim quotes + trimFunc := isCharFunc(rune(quote)) + value = string(bytes.TrimLeftFunc(bytes.TrimRightFunc(src[0:i], trimFunc), trimFunc)) + if quote == prefixDoubleQuote { + // unescape newlines for double quote (this is compat feature) + // and expand environment variables + value = expandVariables(expandEscapes(value), vars) + } + + return value, src[i+1:], nil + } + + // return formatted error if quoted string is not terminated + valEndIndex := bytes.IndexFunc(src, isCharFunc('\n')) + if valEndIndex == -1 { + valEndIndex = len(src) + } + + return "", nil, fmt.Errorf("unterminated quoted value %s", src[:valEndIndex]) +} + +func expandEscapes(str string) string { + out := escapeRegex.ReplaceAllStringFunc(str, func(match string) string { + c := strings.TrimPrefix(match, `\`) + switch c { + case "n": + return "\n" + case "r": + return "\r" + default: + return match + } + }) + return unescapeCharsRegex.ReplaceAllString(out, "$1") +} + +func indexOfNonSpaceChar(src []byte) int { + return bytes.IndexFunc(src, func(r rune) bool { + return !unicode.IsSpace(r) + }) +} + +// hasQuotePrefix reports whether charset starts with single or double quote and returns quote character +func hasQuotePrefix(src []byte) (prefix byte, isQuored bool) { + if len(src) == 0 { + return 0, false + } + + switch prefix := src[0]; prefix { + case prefixDoubleQuote, prefixSingleQuote: + return prefix, true + default: + return 0, false + } +} + +func isCharFunc(char rune) func(rune) bool { + return func(v rune) bool { + return v == char + } +} + +// isSpace reports whether the rune is a space character but not line break character +// +// this differs from unicode.IsSpace, which also applies line break as space +func isSpace(r rune) bool { + switch r { + case '\t', '\v', '\f', '\r', ' ', 0x85, 0xA0: + return true + } + return false +} From 993ff7a3f12403df07686e7ea658caa07081e595 Mon Sep 17 00:00:00 2001 From: x1unix Date: Thu, 10 Sep 2020 07:27:38 +0300 Subject: [PATCH 2/2] Add multi-line var values test case and update comment test Signed-off-by: x1unix --- godotenv_test.go | 83 +++++++++++++++++++++++++++++++++--------------- 1 file changed, 58 insertions(+), 25 deletions(-) diff --git a/godotenv_test.go b/godotenv_test.go index d1f73cb..36f3b35 100644 --- a/godotenv_test.go +++ b/godotenv_test.go @@ -271,6 +271,34 @@ func TestExpanding(t *testing.T) { } +func TestVariableStringValueSeparator(t *testing.T) { + input := "TEST_URLS=\"stratum+tcp://stratum.antpool.com:3333\nstratum+tcp://stratum.antpool.com:443\"" + want := map[string]string{ + "TEST_URLS": "stratum+tcp://stratum.antpool.com:3333\nstratum+tcp://stratum.antpool.com:443", + } + got, err := Parse(strings.NewReader(input)) + if err != nil { + t.Error(err) + } + + if len(got) != len(want) { + t.Fatalf( + "unexpected value:\nwant:\n\t%#v\n\ngot:\n\t%#v", want, got) + } + + for k, wantVal := range want { + gotVal, ok := got[k] + if !ok { + t.Fatalf("key %q doesn't present in result", k) + } + if wantVal != gotVal { + t.Fatalf( + "mismatch in %q value:\nwant:\n\t%s\n\ngot:\n\t%s", k, + wantVal, gotVal) + } + } +} + func TestActualEnvVarsAreLeftAlone(t *testing.T) { os.Clearenv() os.Setenv("OPTION_A", "actualenv") @@ -377,33 +405,38 @@ func TestParsing(t *testing.T) { } func TestLinesToIgnore(t *testing.T) { - // it 'ignores empty lines' do - // expect(env("\n \t \nfoo=bar\n \nfizz=buzz")).to eql('foo' => 'bar', 'fizz' => 'buzz') - if !isIgnoredLine("\n") { - t.Error("Line with nothing but line break wasn't ignored") - } - - if !isIgnoredLine("\r\n") { - t.Error("Line with nothing but windows-style line break wasn't ignored") - } - - if !isIgnoredLine("\t\t ") { - t.Error("Line full of whitespace wasn't ignored") - } - - // it 'ignores comment lines' do - // expect(env("\n\n\n # HERE GOES FOO \nfoo=bar")).to eql('foo' => 'bar') - if !isIgnoredLine("# comment") { - t.Error("Comment wasn't ignored") - } - - if !isIgnoredLine("\t#comment") { - t.Error("Indented comment wasn't ignored") + cases := map[string]struct { + input string + want string + }{ + "Line with nothing but line break": { + input: "\n", + }, + "Line with nothing but windows-style line break": { + input: "\r\n", + }, + "Line full of whitespace": { + input: "\t\t ", + }, + "Comment": { + input: "# Comment", + }, + "Indented comment": { + input: "\t # comment", + }, + "non-ignored value": { + input: `export OPTION_B='\n'`, + want: `export OPTION_B='\n'`, + }, } - // make sure we're not getting false positives - if isIgnoredLine(`export OPTION_B='\n'`) { - t.Error("ignoring a perfectly valid line to parse") + for n, c := range cases { + t.Run(n, func(t *testing.T) { + got := string(getStatementStart([]byte(c.input))) + if got != c.want { + t.Errorf("Expected:\t %q\nGot:\t %q", c.want, got) + } + }) } }