add unified way to validate river identifiers (#18)

* add unified way to validate river identifiers * add an opinionated way to sanitize a string into a river identifier. Signed-off-by: erikbaranowski <39704712+erikbaranowski@users.noreply.github.com> --------- Signed-off-by: erikbaranowski <39704712+erikbaranowski@users.noreply.github.com>
grafana · Aug 30, 2023 · b2f05be · b2f05be
1 parent 93eb6c4
commit b2f05be
Show file tree

Hide file tree

Showing 4 changed files with 154 additions and 14 deletions.
diff --git a/parser/internal.go b/parser/internal.go
@@ -344,7 +344,7 @@ func (p *parser) parseBlockName() *blockName {
 			// label to be a valid identifier.
 			if len(p.lit) > 2 {
 				bn.Label = p.lit[1 : len(p.lit)-1]
-				if !isValidIdentifier(bn.Label) {
+				if !scanner.IsValidIdentifier(bn.Label) {
 					p.addErrorf("expected block label to be a valid identifier, but got %q", bn.Label)
 				}
 			}
@@ -707,9 +707,3 @@ func (p *parser) parseField() *ast.ObjectField {
 	field.Value = p.ParseExpression()
 	return &field
 }
-
-func isValidIdentifier(in string) bool {
-	s := scanner.New(nil, []byte(in), nil, 0)
-	_, tok, lit := s.Scan()
-	return tok == token.IDENT && lit == in
-}
diff --git a/scanner/identifier.go b/scanner/identifier.go
@@ -0,0 +1,60 @@
+package scanner
+
+import (
+	"fmt"
+
+	"github.com/grafana/river/token"
+)
+
+// IsValidIdentifier returns true if the given string is a valid river
+// identifier.
+func IsValidIdentifier(in string) bool {
+	s := New(token.NewFile(""), []byte(in), nil, 0)
+	_, tok, lit := s.Scan()
+	return tok == token.IDENT && lit == in
+}
+
+// SanitizeIdentifier will return the given string mutated into a valid river
+// identifier. If the given string is already a valid identifier, it will be
+// returned unchanged.
+//
+// This should be used with caution since the different inputs can result in
+// identical outputs.
+func SanitizeIdentifier(in string) (string, error) {
+	if in == "" {
+		return "", fmt.Errorf("cannot generate a new identifier for an empty string")
+	}
+
+	if IsValidIdentifier(in) {
+		return in, nil
+	}
+
+	newValue := generateNewIdentifier(in)
+	if !IsValidIdentifier(newValue) {
+		panic(fmt.Errorf("invalid identifier %q generated for `%q`", newValue, in))
+	}
+
+	return newValue, nil
+}
+
+// generateNewIdentifier expects a valid river prefix and replacement
+// string and returns a new identifier based on the given input.
+func generateNewIdentifier(in string) string {
+	newValue := ""
+	for i, c := range in {
+		if i == 0 {
+			if isDigit(c) {
+				newValue = "_"
+			}
+		}
+
+		if !(isLetter(c) || isDigit(c)) {
+			newValue += "_"
+			continue
+		}
+
+		newValue += string(c)
+	}
+
+	return newValue
+}
diff --git a/scanner/identifier_test.go b/scanner/identifier_test.go
@@ -0,0 +1,92 @@
+package scanner_test
+
+import (
+	"testing"
+
+	"github.com/grafana/river/scanner"
+	"github.com/stretchr/testify/require"
+)
+
+var validTestCases = []struct {
+	name       string
+	identifier string
+	expect     bool
+}{
+	{"empty", "", false},
+	{"start_number", "0identifier_1", false},
+	{"start_char", "identifier_1", true},
+	{"start_underscore", "_identifier_1", true},
+	{"special_chars", "!@#$%^&*()", false},
+	{"special_char", "identifier_1!", false},
+	{"spaces", "identifier _ 1", false},
+}
+
+func TestIsValidIdentifier(t *testing.T) {
+	for _, tc := range validTestCases {
+		t.Run(tc.name, func(t *testing.T) {
+			require.Equal(t, tc.expect, scanner.IsValidIdentifier(tc.identifier))
+		})
+	}
+}
+
+func BenchmarkIsValidIdentifier(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		for _, tc := range validTestCases {
+			_ = scanner.IsValidIdentifier(tc.identifier)
+		}
+	}
+}
+
+var sanitizeTestCases = []struct {
+	name             string
+	identifier       string
+	expectIdentifier string
+	expectErr        string
+}{
+	{"empty", "", "", "cannot generate a new identifier for an empty string"},
+	{"start_number", "0identifier_1", "_0identifier_1", ""},
+	{"start_char", "identifier_1", "identifier_1", ""},
+	{"start_underscore", "_identifier_1", "_identifier_1", ""},
+	{"special_chars", "!@#$%^&*()", "__________", ""},
+	{"special_char", "identifier_1!", "identifier_1_", ""},
+	{"spaces", "identifier _ 1", "identifier___1", ""},
+}
+
+func TestSanitizeIdentifier(t *testing.T) {
+	for _, tc := range sanitizeTestCases {
+		t.Run(tc.name, func(t *testing.T) {
+			newIdentifier, err := scanner.SanitizeIdentifier(tc.identifier)
+			if tc.expectErr != "" {
+				require.EqualError(t, err, tc.expectErr)
+				return
+			}
+
+			require.NoError(t, err)
+			require.Equal(t, tc.expectIdentifier, newIdentifier)
+		})
+	}
+}
+
+func BenchmarkSanitizeIdentifier(b *testing.B) {
+	for i := 0; i < b.N; i++ {
+		for _, tc := range sanitizeTestCases {
+			_, _ = scanner.SanitizeIdentifier(tc.identifier)
+		}
+	}
+}
+
+func FuzzSanitizeIdentifier(f *testing.F) {
+	for _, tc := range sanitizeTestCases {
+		f.Add(tc.identifier)
+	}
+
+	f.Fuzz(func(t *testing.T, input string) {
+		newIdentifier, err := scanner.SanitizeIdentifier(input)
+		if input == "" {
+			require.EqualError(t, err, "cannot generate a new identifier for an empty string")
+			return
+		}
+		require.NoError(t, err)
+		require.True(t, scanner.IsValidIdentifier(newIdentifier))
+	})
+}
diff --git a/token/builder/value_tokens.go b/token/builder/value_tokens.go
@@ -68,7 +68,7 @@ func valueTokens(v value.Value) []Token {
 		}
 
 		for i := 0; i < len(keys); i++ {
-			if isValidIdentifier(keys[i]) {
+			if scanner.IsValidIdentifier(keys[i]) {
 				toks = append(toks, Token{token.IDENT, keys[i]})
 			} else {
 				toks = append(toks, Token{token.STRING, fmt.Sprintf("%q", keys[i])})
@@ -93,9 +93,3 @@ func valueTokens(v value.Value) []Token {
 
 	return toks
 }
-
-func isValidIdentifier(in string) bool {
-	s := scanner.New(nil, []byte(in), nil, 0)
-	_, tok, lit := s.Scan()
-	return tok == token.IDENT && lit == in
-}