Skip to content

Commit

Permalink
add unified way to validate river identifiers (#18)
Browse files Browse the repository at this point in the history
* add unified way to validate river identifiers
* add an opinionated way to sanitize a string into a river identifier.

Signed-off-by: erikbaranowski <39704712+erikbaranowski@users.noreply.github.com>

---------

Signed-off-by: erikbaranowski <39704712+erikbaranowski@users.noreply.github.com>
  • Loading branch information
erikbaranowski committed Aug 30, 2023
1 parent 93eb6c4 commit b2f05be
Show file tree
Hide file tree
Showing 4 changed files with 154 additions and 14 deletions.
8 changes: 1 addition & 7 deletions parser/internal.go
Original file line number Diff line number Diff line change
Expand Up @@ -344,7 +344,7 @@ func (p *parser) parseBlockName() *blockName {
// label to be a valid identifier.
if len(p.lit) > 2 {
bn.Label = p.lit[1 : len(p.lit)-1]
if !isValidIdentifier(bn.Label) {
if !scanner.IsValidIdentifier(bn.Label) {
p.addErrorf("expected block label to be a valid identifier, but got %q", bn.Label)
}
}
Expand Down Expand Up @@ -707,9 +707,3 @@ func (p *parser) parseField() *ast.ObjectField {
field.Value = p.ParseExpression()
return &field
}

func isValidIdentifier(in string) bool {
s := scanner.New(nil, []byte(in), nil, 0)
_, tok, lit := s.Scan()
return tok == token.IDENT && lit == in
}
60 changes: 60 additions & 0 deletions scanner/identifier.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
package scanner

import (
"fmt"

"github.com/grafana/river/token"
)

// IsValidIdentifier returns true if the given string is a valid river
// identifier.
func IsValidIdentifier(in string) bool {
s := New(token.NewFile(""), []byte(in), nil, 0)
_, tok, lit := s.Scan()
return tok == token.IDENT && lit == in
}

// SanitizeIdentifier will return the given string mutated into a valid river
// identifier. If the given string is already a valid identifier, it will be
// returned unchanged.
//
// This should be used with caution since the different inputs can result in
// identical outputs.
func SanitizeIdentifier(in string) (string, error) {
if in == "" {
return "", fmt.Errorf("cannot generate a new identifier for an empty string")
}

if IsValidIdentifier(in) {
return in, nil
}

newValue := generateNewIdentifier(in)
if !IsValidIdentifier(newValue) {
panic(fmt.Errorf("invalid identifier %q generated for `%q`", newValue, in))
}

return newValue, nil
}

// generateNewIdentifier expects a valid river prefix and replacement
// string and returns a new identifier based on the given input.
func generateNewIdentifier(in string) string {
newValue := ""
for i, c := range in {
if i == 0 {
if isDigit(c) {
newValue = "_"
}
}

if !(isLetter(c) || isDigit(c)) {
newValue += "_"
continue
}

newValue += string(c)
}

return newValue
}
92 changes: 92 additions & 0 deletions scanner/identifier_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
package scanner_test

import (
"testing"

"github.com/grafana/river/scanner"
"github.com/stretchr/testify/require"
)

var validTestCases = []struct {
name string
identifier string
expect bool
}{
{"empty", "", false},
{"start_number", "0identifier_1", false},
{"start_char", "identifier_1", true},
{"start_underscore", "_identifier_1", true},
{"special_chars", "!@#$%^&*()", false},
{"special_char", "identifier_1!", false},
{"spaces", "identifier _ 1", false},
}

func TestIsValidIdentifier(t *testing.T) {
for _, tc := range validTestCases {
t.Run(tc.name, func(t *testing.T) {
require.Equal(t, tc.expect, scanner.IsValidIdentifier(tc.identifier))
})
}
}

func BenchmarkIsValidIdentifier(b *testing.B) {
for i := 0; i < b.N; i++ {
for _, tc := range validTestCases {
_ = scanner.IsValidIdentifier(tc.identifier)
}
}
}

var sanitizeTestCases = []struct {
name string
identifier string
expectIdentifier string
expectErr string
}{
{"empty", "", "", "cannot generate a new identifier for an empty string"},
{"start_number", "0identifier_1", "_0identifier_1", ""},
{"start_char", "identifier_1", "identifier_1", ""},
{"start_underscore", "_identifier_1", "_identifier_1", ""},
{"special_chars", "!@#$%^&*()", "__________", ""},
{"special_char", "identifier_1!", "identifier_1_", ""},
{"spaces", "identifier _ 1", "identifier___1", ""},
}

func TestSanitizeIdentifier(t *testing.T) {
for _, tc := range sanitizeTestCases {
t.Run(tc.name, func(t *testing.T) {
newIdentifier, err := scanner.SanitizeIdentifier(tc.identifier)
if tc.expectErr != "" {
require.EqualError(t, err, tc.expectErr)
return
}

require.NoError(t, err)
require.Equal(t, tc.expectIdentifier, newIdentifier)
})
}
}

func BenchmarkSanitizeIdentifier(b *testing.B) {
for i := 0; i < b.N; i++ {
for _, tc := range sanitizeTestCases {
_, _ = scanner.SanitizeIdentifier(tc.identifier)
}
}
}

func FuzzSanitizeIdentifier(f *testing.F) {
for _, tc := range sanitizeTestCases {
f.Add(tc.identifier)
}

f.Fuzz(func(t *testing.T, input string) {
newIdentifier, err := scanner.SanitizeIdentifier(input)
if input == "" {
require.EqualError(t, err, "cannot generate a new identifier for an empty string")
return
}
require.NoError(t, err)
require.True(t, scanner.IsValidIdentifier(newIdentifier))
})
}
8 changes: 1 addition & 7 deletions token/builder/value_tokens.go
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ func valueTokens(v value.Value) []Token {
}

for i := 0; i < len(keys); i++ {
if isValidIdentifier(keys[i]) {
if scanner.IsValidIdentifier(keys[i]) {
toks = append(toks, Token{token.IDENT, keys[i]})
} else {
toks = append(toks, Token{token.STRING, fmt.Sprintf("%q", keys[i])})
Expand All @@ -93,9 +93,3 @@ func valueTokens(v value.Value) []Token {

return toks
}

func isValidIdentifier(in string) bool {
s := scanner.New(nil, []byte(in), nil, 0)
_, tok, lit := s.Scan()
return tok == token.IDENT && lit == in
}

0 comments on commit b2f05be

Please sign in to comment.