add lots more tests, clean up tests a bit with tn and tt shortcuts, r…

…eport delim balancing errors in the lexer, fix a potential issue where delims clash with the block delims
jmoiron · May 11, 2014 · c461e11 · c461e11
1 parent fa8e0b9
commit c461e11
Show file tree

Hide file tree

Showing 3 changed files with 158 additions and 48 deletions.
diff --git a/neo/environment.go b/neo/environment.go
@@ -101,6 +101,7 @@ func (e *Environment) lex(source, name, filename string) *lexer {
 		leftDelim:  cfg.BlockStartString,
 		rightDelim: cfg.BlockEndString,
 		items:      make(chan item),
+		delimStack: make([]rune, 0, 10),
 	}
 	go l.run()
 	return l

diff --git a/neo/lex.go b/neo/lex.go
@@ -126,6 +126,7 @@ type lexer struct {
 	width      Pos       // width of last rune read from input
 	lastPos    Pos       // position of most recent item returned by nextItem
 	items      chan item // channel of scanned items
+	delimStack []rune
 	// we will need a more sophisticated delim stack to parse jigo
 	//parenDepth int       // nesting depth of ( ) exprs
 }
@@ -151,6 +152,32 @@ func (l *lexer) peek() rune {
 	return r
 }
 
+// maintain the delim stack if expected delimiter is r.  pop the
+// stack if it's right, return false if it's wrong
+func (l *lexer) expectDelim(r rune) bool {
+	if len(l.delimStack) == 0 {
+		l.errorf("Imbalanced delimiters, was not expecting %c", r)
+		return false
+	}
+	expect := l.delimStack[len(l.delimStack)-1]
+	if expect != r {
+		l.errorf("Imbalanced delimiters, expected %c, got %c", expect, r)
+		return false
+	}
+
+	l.delimStack = l.delimStack[:len(l.delimStack)-1]
+	return true
+}
+
+// return whether or not we are expecting r as the next delimiter
+func (l *lexer) shouldExpectDelim(r rune) bool {
+	if len(l.delimStack) == 0 {
+		return false
+	}
+	expect := l.delimStack[len(l.delimStack)-1]
+	return expect == r
+}
+
 // backup steps back one rune. Can only be called once per call of next.
 func (l *lexer) backup() {
 	l.pos -= l.width
@@ -314,7 +341,10 @@ func lexInsideBlock(l *lexer) stateFn {
 		if l.pos == Pos(len(l.input)) {
 			return nil
 		}
-		if strings.HasPrefix(l.input[l.pos:], l.rightDelim) {
+		// if this is the rightDelim, but we are expecting the next char as a delimiter
+		// then skip marking this as rightDelim.  This allows us to have, eg, '}}' as
+		// part of a literal inside a var block.
+		if strings.HasPrefix(l.input[l.pos:], l.rightDelim) && !l.shouldExpectDelim(l.peek()) {
 			l.pos += Pos(len(l.rightDelim))
 			l.emitRight()
 			return lexText
@@ -330,10 +360,18 @@ func lexInsideBlock(l *lexer) stateFn {
 		}
 
 		switch r {
+		case ',':
+			l.emit(tokenComma)
+		case '|':
+			l.emit(tokenPipe)
 		case '+':
 			l.emit(tokenAdd)
+		case '-':
+			l.emit(tokenSub)
 		case '~':
 			l.emit(tokenTilde)
+		case ':':
+			l.emit(tokenColon)
 		case '/':
 			if l.accept("/") {
 				l.emit(tokenFloordiv)
@@ -358,21 +396,36 @@ func lexInsideBlock(l *lexer) stateFn {
 			} else {
 				l.emit(tokenMul)
 			}
-		// TODO: ballancing
+		case '=':
+			if l.accept("=") {
+				l.emit(tokenEqEq)
+			} else {
+				l.emit(tokenEq)
+			}
 		case '(':
 			l.emit(tokenLparen)
+			l.delimStack = append(l.delimStack, ')')
 		case '{':
 			l.emit(tokenLbrace)
+			l.delimStack = append(l.delimStack, '}')
 		case '[':
 			l.emit(tokenLbracket)
+			l.delimStack = append(l.delimStack, ']')
 		case ')':
+			if !l.expectDelim(r) {
+				return nil
+			}
 			l.emit(tokenRparen)
 		case '}':
+			if !l.expectDelim(r) {
+				return nil
+			}
 			l.emit(tokenRbrace)
 		case ']':
+			if !l.expectDelim(r) {
+				return nil
+			}
 			l.emit(tokenRbracket)
-		case '-':
-			l.emit(tokenSub)
 		}
 	}
 }

diff --git a/neo/lex_test.go b/neo/lex_test.go
@@ -1,6 +1,9 @@
 package jigo
 
-import "testing"
+import (
+	"fmt"
+	"testing"
+)
 
 type tokenTest struct {
 	typ itemType
@@ -15,8 +18,42 @@ var (
 	ttVariableBegin = tokenTest{tokenVariableBegin, "{{"}
 	ttVariableEnd   = tokenTest{tokenVariableEnd, "}}"}
 	ttEOF           = tokenTest{tokenEOF, ""}
+	ttSub           = tokenTest{tokenSub, "-"}
+	ttAdd           = tokenTest{tokenAdd, "+"}
+	ttDiv           = tokenTest{tokenDiv, "/"}
+	ttComma         = tokenTest{tokenComma, ","}
+	ttPipe          = tokenTest{tokenPipe, "|"}
+	ttLparen        = tokenTest{tokenLparen, "("}
+	ttRparen        = tokenTest{tokenRparen, ")"}
+	ttLbrace        = tokenTest{tokenLbrace, "{"}
+	ttRbrace        = tokenTest{tokenRbrace, "}"}
+	ttLbracket      = tokenTest{tokenLbracket, "["}
+	ttRbracket      = tokenTest{tokenRbracket, "]"}
+	ttColon         = tokenTest{tokenColon, ":"}
+	ttMul           = tokenTest{tokenMul, "*"}
+	ttPow           = tokenTest{tokenPow, "**"}
+	ttFloordiv      = tokenTest{tokenFloordiv, "//"}
+	ttGt            = tokenTest{tokenGt, ">"}
+	ttLt            = tokenTest{tokenLt, "<"}
+	ttGteq          = tokenTest{tokenGteq, ">="}
+	ttLteq          = tokenTest{tokenLteq, "<="}
+	ttEq            = tokenTest{tokenEq, "="}
+	ttEqEq          = tokenTest{tokenEqEq, "=="}
+	sp              = tokenTest{tokenWhitespace, " "}
 )
 
+func (t tokenTest) String() string {
+	return `"` + t.val + `"`
+}
+
+func tn(name string) tokenTest {
+	return tokenTest{tokenName, name}
+}
+
+func tt(name string) tokenTest {
+	return tokenTest{tokenText, name}
+}
+
 func tokenize(l *lexer) []item {
 	items := make([]item, 0, 50)
 	for t := range l.items {
@@ -34,14 +71,19 @@ func (lt *lextest) Test(input string, tests []tokenTest) {
 	l := e.lex(input, "test", "test.jigo")
 	tokens := tokenize(l)
 	if len(tokens) != len(tests) {
-		t.Fatalf("Expected %d tokens, got %d\n", len(tests), len(tokens))
+		t.Errorf("Expected %d tokens, got %d\n", len(tests), len(tokens))
 	}
 	for i, tok := range tokens {
+		if i >= len(tests) {
+			return
+		}
 		test := tests[i]
 		if test.typ != tok.typ {
+			fmt.Printf("tokens: %v\ntests:  %v\n", tokens, tests)
 			t.Errorf("Expecting %d token type to be %d, got %d\n", i, test.typ, tok.typ)
 		}
-		if test.val != tok.val {
+		if tok.typ != tokenWhitespace && test.val != tok.val {
+			fmt.Printf("tokens: %v\ntests:  %v\n", tokens, tests)
 			t.Errorf("Expecting %d token val to be `%s`, got `%s`\n", i, test.val, tok.val)
 		}
 	}
@@ -53,80 +95,94 @@ func TestLexer(t *testing.T) {
 	// Testing simple text with no jigo syntax
 	tester.Test(
 		`Hello, world`,
-		[]tokenTest{{tokenText, `Hello, world`}, ttEOF},
+		[]tokenTest{tt(`Hello, world`), ttEOF},
 	)
 
 	// Testing simple text with single jigo comment
 	tester.Test(
 		`{# comment #}`,
+		[]tokenTest{ttCommentBegin, tt(" comment "), ttCommentEnd, ttEOF},
+	)
+
+	tester.Test(
+		`Hello, {# comment #}World`,
+		[]tokenTest{tt("Hello, "), ttCommentBegin, tt(" comment "), ttCommentEnd, tt("World"), ttEOF},
+	)
+
+	tester.Test(
+		`{{ foo }}`,
+		[]tokenTest{ttVariableBegin, sp, tn("foo"), sp, ttVariableEnd, ttEOF},
+	)
+
+	tester.Test(
+		`{{ (a - b) + c }}`,
 		[]tokenTest{
-			ttCommentBegin,
-			{tokenText, " comment "},
-			ttCommentEnd,
-			ttEOF,
+			ttVariableBegin, sp, ttLparen, tn("a"), sp, ttSub, sp, tn("b"), ttRparen, sp,
+			ttAdd, sp, tn("c"), sp, ttVariableEnd, ttEOF,
 		},
 	)
 
 	tester.Test(
-		`Hello, {# comment #}World`,
+		`Hello.  {% if true %}World{% else %}Nobody{% endif %}`,
 		[]tokenTest{
-			{tokenText, "Hello, "},
-			ttCommentBegin,
-			{tokenText, " comment "},
-			ttCommentEnd,
-			{tokenText, "World"},
-			ttEOF,
+			tt("Hello.  "), ttBlockBegin, sp, tn("if"), sp,
+			{tokenBool, "true"}, sp, ttBlockEnd, tt("World"), ttBlockBegin, sp, tn("else"), sp,
+			ttBlockEnd, tt("Nobody"), ttBlockBegin, sp, tn("endif"), sp, ttBlockEnd, ttEOF,
 		},
 	)
 
-	sp := tokenTest{tokenWhitespace, " "}
+	tester.Test(
+		`<html>{# ignore {% tags %} in comments ##}</html>`,
+		[]tokenTest{
+			tt("<html>"), ttCommentBegin, tt(" ignore {% tags %} in comments #"),
+			ttCommentEnd, tt("</html>"), ttEOF,
+		},
+	)
 
 	tester.Test(
-		`{{ foo }}`,
+		`{# comment #}{% if foo -%} bar {%- elif baz %} bing{%endif    %}`,
 		[]tokenTest{
-			ttVariableBegin, sp,
-			{tokenName, "foo"}, sp,
-			ttVariableEnd,
-			ttEOF,
+			ttCommentBegin, tt(" comment "), ttCommentEnd, ttBlockBegin, sp, tn("if"), sp,
+			tn("foo"), sp, ttSub, ttBlockEnd, tt(" bar "), ttBlockBegin, ttSub, sp, tn("elif"),
+			sp, tn("baz"), sp, ttBlockEnd, tt(" bing"), ttBlockBegin, tn("endif"), sp,
+			ttBlockEnd, ttEOF,
 		},
 	)
 
+	// test a big mess of tokens including single and double character tokens
 	tester.Test(
-		`{{ (a - b) + c }}`,
+		`{{ +--+ /+//,|*/**=>>=<=< == }}`,
+		[]tokenTest{
+			ttVariableBegin, sp, ttAdd, ttSub, ttSub, ttAdd, sp, ttDiv, ttAdd, ttFloordiv,
+			ttComma, ttPipe, ttMul, ttDiv, ttPow, ttEq, ttGt, ttGteq, ttLteq, ttLt, sp, ttEqEq,
+			sp, ttVariableEnd, ttEOF,
+		},
+	)
+
+	tester.Test(
+		`{{ ([{}]()) }}`,
 		[]tokenTest{
 			ttVariableBegin, sp,
-			{tokenLparen, "("},
-			{tokenName, "a"}, sp,
-			{tokenSub, "-"}, sp,
-			{tokenName, "b"},
-			{tokenRparen, ")"}, sp,
-			{tokenAdd, "+"}, sp,
-			{tokenName, "c"}, sp,
-			ttVariableEnd,
-			ttEOF,
+			ttLparen, ttLbracket, ttLbrace, ttRbrace, ttRbracket, ttLparen, ttRparen, ttRparen, sp,
+			ttVariableEnd, ttEOF,
 		},
 	)
 
 	tester.Test(
-		`Hello.  {% if true %}World{% else %}Nobody{% endif %}`,
+		`{{ ([{]) }}`,
 		[]tokenTest{
-			{tokenText, "Hello.  "},
-			ttBlockBegin, sp,
-			{tokenName, "if"}, sp,
-			{tokenBool, "true"}, sp, ttBlockEnd,
-			{tokenText, "World"}, ttBlockBegin, sp,
-			{tokenName, "else"}, sp, ttBlockEnd,
-			{tokenText, "Nobody"}, ttBlockBegin, sp,
-			{tokenName, "endif"}, sp, ttBlockEnd, ttEOF,
+			ttVariableBegin, sp, ttLparen, ttLbracket, ttLbrace,
+			{tokenError, "Imbalanced delimiters, expected }, got ]"},
 		},
 	)
 
+	// Test that unballancing delimiters takes precedence over closing the block, ie.
+	// that the `}}` closing of the map doesn't close the var tag.
 	tester.Test(
-		`<html>{# ignore {% tags %} in comments ##}</html>`,
+		`{{ ({a:b, {a:b}}) }}`,
 		[]tokenTest{
-			{tokenText, "<html>"}, ttCommentBegin,
-			{tokenText, " ignore {% tags %} in comments #"}, ttCommentEnd,
-			{tokenText, "</html>"}, ttEOF,
+			ttVariableBegin, sp, ttLparen, ttLbrace, tn("a"), ttColon, tn("b"), ttComma, sp,
+			ttLbrace, tn("a"), ttColon, tn("b"), ttRbrace, ttRbrace, ttRparen, sp, ttVariableEnd, ttEOF,
 		},
 	)
 }