core/router/macro/interpreter/lexer/lexer.go

// Copyright 2017 Gerasimos Maropoulos. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package lexer

import (
	"github.com/go-siris/siris/core/router/macro/interpreter/token"
)

// Lexer helps us to read/scan characters of a source and resolve their token types.
type Lexer struct {
	input   string
	pos     int  // current pos in input, current char
	readPos int  // current reading pos in input, after current char
	ch      byte // current char under examination
}

// New takes a source, series of chars, and returns
// a new, ready to read from the first letter, lexer.
func New(src string) *Lexer {
	l := &Lexer{
		input: src,
	}
	// step to the first character in order to be ready
	l.readChar()
	return l
}

func (l *Lexer) readChar() {
	if l.readPos >= len(l.input) {
		l.ch = 0
	} else {
		l.ch = l.input[l.readPos]
	}
	l.pos = l.readPos
	l.readPos++
}

const (
	// Begin is the symbol which lexer should scan forward to.
	Begin = '{' // token.LBRACE
	// End is the symbol which lexer should stop scanning.
	End = '}' // token.RBRACE
)

func resolveTokenType(ch byte) token.Type {
	switch ch {
	case Begin:
		return token.LBRACE
	case End:
		return token.RBRACE
	// Let's keep it simple, no evaluation for logical operators, we are not making a new programming language, keep it simple makis.
	// ||
	// case '|':
	// 	if l.peekChar() == '|' {
	// 		ch := l.ch
	// 		l.readChar()
	// 		t = token.Token{Type: token.OR, Literal: string(ch) + string(l.ch)}
	// 	}
	// ==
	case ':':
		return token.COLON
	case '(':
		return token.LPAREN
	case ')':
		return token.RPAREN
	case ',':
		return token.COMMA
		// literals
	case 0:
		return token.EOF
	default:
		return token.IDENT //
	}

}

// NextToken returns the next token in the series of characters.
// It can be a single symbol, a token type or a literal.
// It's able to return an EOF token too.
//
// It moves the cursor forward.
func (l *Lexer) NextToken() (t token.Token) {
	l.skipWhitespace()
	typ := resolveTokenType(l.ch)
	t.Type = typ
	switch typ {
	case token.EOF:
		t.Literal = ""
	case token.IDENT:
		if isLetter(l.ch) {
			// letters
			lit := l.readIdentifier()
			typ := token.LookupIdent(lit)
			t = l.newToken(typ, lit)
			return
		}
		if isDigit(l.ch) {
			// numbers
			lit := l.readNumber()
			t = l.newToken(token.INT, lit)
			return
		}

		t = l.newTokenRune(token.ILLEGAL, l.ch)
	default:
		t = l.newTokenRune(typ, l.ch)
	}
	l.readChar() // set the pos to the next
	return
}

// NextDynamicToken doesn't cares about the grammar.
// It reads numbers or any unknown symbol,
// it's being used by parser to skip all characters
// between parameter function's arguments inside parenthesis,
// in order to allow custom regexp on the end-language too.
//
// It moves the cursor forward.
func (l *Lexer) NextDynamicToken() (t token.Token) {
	// calculate anything, even spaces.

	// numbers
	lit := l.readNumber()
	if lit != "" {
		return l.newToken(token.INT, lit)
	}

	lit = l.readIdentifierFuncArgument()
	return l.newToken(token.IDENT, lit)
}

// used to skip any illegal token if inside parenthesis, used to be able to set custom regexp inside a func.
func (l *Lexer) readIdentifierFuncArgument() string {
	pos := l.pos
	for resolveTokenType(l.ch) != token.RPAREN {
		l.readChar()
	}

	return l.input[pos:l.pos]
}

// PeekNextTokenType returns only the token type
// of the next character and it does not move forward the cursor.
// It's being used by parser to recognise empty functions, i.e `even()`
// as valid functions with zero input arguments.
func (l *Lexer) PeekNextTokenType() token.Type {
	if len(l.input)-1 > l.pos {
		ch := l.input[l.pos]
		return resolveTokenType(ch)
	}
	return resolveTokenType(0) // EOF
}

func (l *Lexer) newToken(tokenType token.Type, lit string) token.Token {
	t := token.Token{
		Type:    tokenType,
		Literal: lit,
		Start:   l.pos,
		End:     l.pos,
	}
	// remember, l.pos is the last char
	// and we want to include both start and end
	// in order to be easy to the user to see by just marking the expression
	if l.pos > 1 && len(lit) > 1 {
		t.End = l.pos - 1
		t.Start = t.End - len(lit) + 1
	}

	return t
}

func (l *Lexer) newTokenRune(tokenType token.Type, ch byte) token.Token {
	return l.newToken(tokenType, string(ch))
}

func (l *Lexer) skipWhitespace() {
	for l.ch == ' ' || l.ch == '\t' || l.ch == '\n' || l.ch == '\r' {
		l.readChar()
	}
}

func (l *Lexer) readIdentifier() string {
	pos := l.pos
	for isLetter(l.ch) {
		l.readChar()
	}
	return l.input[pos:l.pos]
}

func isLetter(ch byte) bool {
	return 'a' <= ch && ch <= 'z' || 'A' <= ch && ch <= 'Z' || ch == '_'
}

func (l *Lexer) readNumber() string {
	pos := l.pos
	for isDigit(l.ch) {
		l.readChar()
	}
	return l.input[pos:l.pos]
}

func isDigit(ch byte) bool {
	return '0' <= ch && ch <= '9'
}