From ae4f51b80c90afe97ecbe5bd1a35a4b01aa12a44 Mon Sep 17 00:00:00 2001 From: x1unix Date: Thu, 10 Sep 2020 07:27:10 +0300 Subject: [PATCH] refactor dotenv parser in order to support multi-line variable values declaration --- godotenv.go | 63 ++++++---------- parser.go | 206 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 230 insertions(+), 39 deletions(-) create mode 100644 parser.go diff --git a/godotenv.go b/godotenv.go index 69e816c..fc1a5f5 100644 --- a/godotenv.go +++ b/godotenv.go @@ -14,10 +14,10 @@ package godotenv import ( - "bufio" "errors" "fmt" "io" + "io/ioutil" "os" "os/exec" "regexp" @@ -27,6 +27,16 @@ import ( const doubleQuoteSpecialChars = "\\\n\r\"!$`" +// Parse reads an env file from io.Reader, returning a map of keys and values. +func Parse(r io.Reader) (map[string]string, error) { + data, err := ioutil.ReadAll(r) + if err != nil { + return nil, err + } + + return UnmarshalBytes(data) +} + // Load will read your env file(s) and load them into ENV for this process. // // Call this function as close as possible to the start of your program (ideally in main) @@ -95,37 +105,16 @@ func Read(filenames ...string) (envMap map[string]string, err error) { return } -// Parse reads an env file from io.Reader, returning a map of keys and values. -func Parse(r io.Reader) (envMap map[string]string, err error) { - envMap = make(map[string]string) - - var lines []string - scanner := bufio.NewScanner(r) - for scanner.Scan() { - lines = append(lines, scanner.Text()) - } - - if err = scanner.Err(); err != nil { - return - } - - for _, fullLine := range lines { - if !isIgnoredLine(fullLine) { - var key, value string - key, value, err = parseLine(fullLine, envMap) - - if err != nil { - return - } - envMap[key] = value - } - } - return +// Unmarshal reads an env file from a string, returning a map of keys and values. +func Unmarshal(str string) (envMap map[string]string, err error) { + return UnmarshalBytes([]byte(str)) } -//Unmarshal reads an env file from a string, returning a map of keys and values. -func Unmarshal(str string) (envMap map[string]string, err error) { - return Parse(strings.NewReader(str)) +// UnmarshalBytes parses env file from byte slice of chars, returning a map of keys and values. +func UnmarshalBytes(src []byte) (map[string]string, error) { + out := make(map[string]string) + err := parseBytes(src, out) + return out, err } // Exec loads env vars from the specified filenames (empty map falls back to default) @@ -136,7 +125,9 @@ func Unmarshal(str string) (envMap map[string]string, err error) { // If you want more fine grained control over your command it's recommended // that you use `Load()` or `Read()` and the `os/exec` package yourself. func Exec(filenames []string, cmd string, cmdArgs []string) error { - Load(filenames...) + if err := Load(filenames...); err != nil { + return err + } command := exec.Command(cmd, cmdArgs...) command.Stdin = os.Stdin @@ -160,8 +151,7 @@ func Write(envMap map[string]string, filename string) error { if err != nil { return err } - file.Sync() - return err + return file.Sync() } // Marshal outputs the given environment as a dotenv-formatted environment file. @@ -197,7 +187,7 @@ func loadFile(filename string, overload bool) error { for key, value := range envMap { if !currentEnv[key] || overload { - os.Setenv(key, value) + _ = os.Setenv(key, value) } } @@ -338,11 +328,6 @@ func expandVariables(v string, m map[string]string) string { }) } -func isIgnoredLine(line string) bool { - trimmedLine := strings.TrimSpace(line) - return len(trimmedLine) == 0 || strings.HasPrefix(trimmedLine, "#") -} - func doubleQuoteEscape(line string) string { for _, c := range doubleQuoteSpecialChars { toReplace := "\\" + string(c) diff --git a/parser.go b/parser.go new file mode 100644 index 0000000..e44d902 --- /dev/null +++ b/parser.go @@ -0,0 +1,206 @@ +package godotenv + +import ( + "bytes" + "errors" + "fmt" + "strings" + "unicode" +) + +const ( + charComment = '#' + prefixSingleQuote = '\'' + prefixDoubleQuote = '"' + + exportPrefix = "export" +) + +func parseBytes(src []byte, out map[string]string) error { + cutset := src + for { + cutset = getStatementStart(cutset) + if cutset == nil { + // reached end of file + break + } + + key, left, err := locateKeyName(cutset) + if err != nil { + return err + } + + value, left, err := extractVarValue(left, out) + if err != nil { + return err + } + + out[key] = value + cutset = left + } + + return nil +} + +// getStatementPosition returns position of statement begin. +// +// It skips any comment line or non-whitespace character. +func getStatementStart(src []byte) []byte { + pos := indexOfNonSpaceChar(src) + if pos == -1 { + return nil + } + + src = src[pos:] + if src[0] != charComment { + return src + } + + // skip comment section + pos = bytes.IndexFunc(src, isCharFunc('\n')) + if pos == -1 { + return nil + } + + return getStatementStart(src[pos:]) +} + +// locateKeyName locates and parses key name and returns rest of slice +func locateKeyName(src []byte) (key string, cutset []byte, err error) { + // trim "export" and space at beginning + src = bytes.TrimLeftFunc(bytes.TrimPrefix(src, []byte(exportPrefix)), isSpace) + + // locate key name end and validate it in single loop + offset := 0 +loop: + for i, char := range src { + rchar := rune(char) + if isSpace(rchar) { + continue + } + + switch char { + case '=', ':': + // library also supports yaml-style value declaration + key = string(src[0:i]) + offset = i + 1 + break loop + case '_': + default: + // variable name should match [A-Za-z0-9_] + if unicode.IsLetter(rchar) || unicode.IsNumber(rchar) { + continue + } + + return "", nil, fmt.Errorf( + `unexpected character %q in variable name near %q`, + string(char), string(src)) + } + } + + if len(src) == 0 { + return "", nil, errors.New("zero length string") + } + + // trim whitespace + key = strings.TrimRightFunc(key, unicode.IsSpace) + cutset = bytes.TrimLeftFunc(src[offset:], isSpace) + return key, cutset, nil +} + +// extractVarValue extracts variable value and returns rest of slice +func extractVarValue(src []byte, vars map[string]string) (value string, rest []byte, err error) { + quote, hasPrefix := hasQuotePrefix(src) + if !hasPrefix { + // unquoted value - read until whitespace + end := bytes.IndexFunc(src, unicode.IsSpace) + if end == -1 { + return expandVariables(string(src), vars), nil, nil + } + + return expandVariables(string(src[0:end]), vars), src[end:], nil + } + + // lookup quoted string terminator + for i := 1; i < len(src); i++ { + if char := src[i]; char != quote { + continue + } + + // skip escaped quote symbol (\" or \', depends on quote) + if prevChar := src[i-1]; prevChar == '\\' { + continue + } + + // trim quotes + trimFunc := isCharFunc(rune(quote)) + value = string(bytes.TrimLeftFunc(bytes.TrimRightFunc(src[0:i], trimFunc), trimFunc)) + if quote == prefixDoubleQuote { + // unescape newlines for double quote (this is compat feature) + // and expand environment variables + value = expandVariables(expandEscapes(value), vars) + } + + return value, src[i+1:], nil + } + + // return formatted error if quoted string is not terminated + valEndIndex := bytes.IndexFunc(src, isCharFunc('\n')) + if valEndIndex == -1 { + valEndIndex = len(src) + } + + return "", nil, fmt.Errorf("unterminated quoted value %s", src[:valEndIndex]) +} + +func expandEscapes(str string) string { + out := escapeRegex.ReplaceAllStringFunc(str, func(match string) string { + c := strings.TrimPrefix(match, `\`) + switch c { + case "n": + return "\n" + case "r": + return "\r" + default: + return match + } + }) + return unescapeCharsRegex.ReplaceAllString(out, "$1") +} + +func indexOfNonSpaceChar(src []byte) int { + return bytes.IndexFunc(src, func(r rune) bool { + return !unicode.IsSpace(r) + }) +} + +// hasQuotePrefix reports whether charset starts with single or double quote and returns quote character +func hasQuotePrefix(src []byte) (prefix byte, isQuored bool) { + if len(src) == 0 { + return 0, false + } + + switch prefix := src[0]; prefix { + case prefixDoubleQuote, prefixSingleQuote: + return prefix, true + default: + return 0, false + } +} + +func isCharFunc(char rune) func(rune) bool { + return func(v rune) bool { + return v == char + } +} + +// isSpace reports whether the rune is a space character but not line break character +// +// this differs from unicode.IsSpace, which also applies line break as space +func isSpace(r rune) bool { + switch r { + case '\t', '\v', '\f', '\r', ' ', 0x85, 0xA0: + return true + } + return false +}