Skip to content
Permalink
Branch: master
Find file Copy path
Find file Copy path
Fetching contributors…
Cannot retrieve contributors at this time
418 lines (346 sloc) 10.1 KB
// Copyright (c) 2012-2017 The ANTLR Project. All rights reserved.
// Use of this file is governed by the BSD 3-clause license that
// can be found in the LICENSE.txt file in the project root.
package antlr
import (
"fmt"
"strconv"
)
// A lexer is recognizer that draws input symbols from a character stream.
// lexer grammars result in a subclass of this object. A Lexer object
// uses simplified Match() and error recovery mechanisms in the interest
// of speed.
///
type Lexer interface {
TokenSource
Recognizer
Emit() Token
SetChannel(int)
PushMode(int)
PopMode() int
SetType(int)
SetMode(int)
}
type BaseLexer struct {
*BaseRecognizer
Interpreter ILexerATNSimulator
TokenStartCharIndex int
TokenStartLine int
TokenStartColumn int
ActionType int
Virt Lexer // The most derived lexer implementation. Allows virtual method calls.
input CharStream
factory TokenFactory
tokenFactorySourcePair *TokenSourceCharStreamPair
token Token
hitEOF bool
channel int
thetype int
modeStack IntStack
mode int
text string
}
func NewBaseLexer(input CharStream) *BaseLexer {
lexer := new(BaseLexer)
lexer.BaseRecognizer = NewBaseRecognizer()
lexer.input = input
lexer.factory = CommonTokenFactoryDEFAULT
lexer.tokenFactorySourcePair = &TokenSourceCharStreamPair{lexer, input}
lexer.Virt = lexer
lexer.Interpreter = nil // child classes must populate it
// The goal of all lexer rules/methods is to create a token object.
// l is an instance variable as multiple rules may collaborate to
// create a single token. NextToken will return l object after
// Matching lexer rule(s). If you subclass to allow multiple token
// emissions, then set l to the last token to be Matched or
// something nonnil so that the auto token emit mechanism will not
// emit another token.
lexer.token = nil
// What character index in the stream did the current token start at?
// Needed, for example, to get the text for current token. Set at
// the start of NextToken.
lexer.TokenStartCharIndex = -1
// The line on which the first character of the token resides///
lexer.TokenStartLine = -1
// The character position of first character within the line///
lexer.TokenStartColumn = -1
// Once we see EOF on char stream, next token will be EOF.
// If you have DONE : EOF then you see DONE EOF.
lexer.hitEOF = false
// The channel number for the current token///
lexer.channel = TokenDefaultChannel
// The token type for the current token///
lexer.thetype = TokenInvalidType
lexer.modeStack = make([]int, 0)
lexer.mode = LexerDefaultMode
// You can set the text for the current token to override what is in
// the input char buffer. Use setText() or can set l instance var.
// /
lexer.text = ""
return lexer
}
const (
LexerDefaultMode = 0
LexerMore = -2
LexerSkip = -3
)
const (
LexerDefaultTokenChannel = TokenDefaultChannel
LexerHidden = TokenHiddenChannel
LexerMinCharValue = 0x0000
LexerMaxCharValue = 0x10FFFF
)
func (b *BaseLexer) reset() {
// wack Lexer state variables
if b.input != nil {
b.input.Seek(0) // rewind the input
}
b.token = nil
b.thetype = TokenInvalidType
b.channel = TokenDefaultChannel
b.TokenStartCharIndex = -1
b.TokenStartColumn = -1
b.TokenStartLine = -1
b.text = ""
b.hitEOF = false
b.mode = LexerDefaultMode
b.modeStack = make([]int, 0)
b.Interpreter.reset()
}
func (b *BaseLexer) GetInterpreter() ILexerATNSimulator {
return b.Interpreter
}
func (b *BaseLexer) GetInputStream() CharStream {
return b.input
}
func (b *BaseLexer) GetSourceName() string {
return b.GrammarFileName
}
func (b *BaseLexer) SetChannel(v int) {
b.channel = v
}
func (b *BaseLexer) GetTokenFactory() TokenFactory {
return b.factory
}
func (b *BaseLexer) setTokenFactory(f TokenFactory) {
b.factory = f
}
func (b *BaseLexer) safeMatch() (ret int) {
defer func() {
if e := recover(); e != nil {
if re, ok := e.(RecognitionException); ok {
b.notifyListeners(re) // Report error
b.Recover(re)
ret = LexerSkip // default
}
}
}()
return b.Interpreter.Match(b.input, b.mode)
}
// Return a token from l source i.e., Match a token on the char stream.
func (b *BaseLexer) NextToken() Token {
if b.input == nil {
panic("NextToken requires a non-nil input stream.")
}
tokenStartMarker := b.input.Mark()
// previously in finally block
defer func() {
// make sure we release marker after Match or
// unbuffered char stream will keep buffering
b.input.Release(tokenStartMarker)
}()
for {
if b.hitEOF {
b.EmitEOF()
return b.token
}
b.token = nil
b.channel = TokenDefaultChannel
b.TokenStartCharIndex = b.input.Index()
b.TokenStartColumn = b.Interpreter.GetCharPositionInLine()
b.TokenStartLine = b.Interpreter.GetLine()
b.text = ""
continueOuter := false
for {
b.thetype = TokenInvalidType
ttype := LexerSkip
ttype = b.safeMatch()
if b.input.LA(1) == TokenEOF {
b.hitEOF = true
}
if b.thetype == TokenInvalidType {
b.thetype = ttype
}
if b.thetype == LexerSkip {
continueOuter = true
break
}
if b.thetype != LexerMore {
break
}
}
if continueOuter {
continue
}
if b.token == nil {
b.Virt.Emit()
}
return b.token
}
return nil
}
// Instruct the lexer to Skip creating a token for current lexer rule
// and look for another token. NextToken() knows to keep looking when
// a lexer rule finishes with token set to SKIPTOKEN. Recall that
// if token==nil at end of any token rule, it creates one for you
// and emits it.
// /
func (b *BaseLexer) Skip() {
b.thetype = LexerSkip
}
func (b *BaseLexer) More() {
b.thetype = LexerMore
}
func (b *BaseLexer) SetMode(m int) {
b.mode = m
}
func (b *BaseLexer) PushMode(m int) {
if LexerATNSimulatorDebug {
fmt.Println("pushMode " + strconv.Itoa(m))
}
b.modeStack.Push(b.mode)
b.mode = m
}
func (b *BaseLexer) PopMode() int {
if len(b.modeStack) == 0 {
panic("Empty Stack")
}
if LexerATNSimulatorDebug {
fmt.Println("popMode back to " + fmt.Sprint(b.modeStack[0:len(b.modeStack)-1]))
}
i, _ := b.modeStack.Pop()
b.mode = i
return b.mode
}
func (b *BaseLexer) inputStream() CharStream {
return b.input
}
func (b *BaseLexer) setInputStream(input CharStream) {
b.input = nil
b.tokenFactorySourcePair = &TokenSourceCharStreamPair{b, b.input}
b.reset()
b.input = input
b.tokenFactorySourcePair = &TokenSourceCharStreamPair{b, b.input}
}
func (b *BaseLexer) GetTokenSourceCharStreamPair() *TokenSourceCharStreamPair {
return b.tokenFactorySourcePair
}
// By default does not support multiple emits per NextToken invocation
// for efficiency reasons. Subclass and override l method, NextToken,
// and GetToken (to push tokens into a list and pull from that list
// rather than a single variable as l implementation does).
// /
func (b *BaseLexer) EmitToken(token Token) {
b.token = token
}
// The standard method called to automatically emit a token at the
// outermost lexical rule. The token object should point into the
// char buffer start..stop. If there is a text override in 'text',
// use that to set the token's text. Override l method to emit
// custom Token objects or provide a Newfactory.
// /
func (b *BaseLexer) Emit() Token {
t := b.factory.Create(b.tokenFactorySourcePair, b.thetype, b.text, b.channel, b.TokenStartCharIndex, b.GetCharIndex()-1, b.TokenStartLine, b.TokenStartColumn)
b.EmitToken(t)
return t
}
func (b *BaseLexer) EmitEOF() Token {
cpos := b.GetCharPositionInLine()
lpos := b.GetLine()
eof := b.factory.Create(b.tokenFactorySourcePair, TokenEOF, "", TokenDefaultChannel, b.input.Index(), b.input.Index()-1, lpos, cpos)
b.EmitToken(eof)
return eof
}
func (b *BaseLexer) GetCharPositionInLine() int {
return b.Interpreter.GetCharPositionInLine()
}
func (b *BaseLexer) GetLine() int {
return b.Interpreter.GetLine()
}
func (b *BaseLexer) GetType() int {
return b.thetype
}
func (b *BaseLexer) SetType(t int) {
b.thetype = t
}
// What is the index of the current character of lookahead?///
func (b *BaseLexer) GetCharIndex() int {
return b.input.Index()
}
// Return the text Matched so far for the current token or any text override.
//Set the complete text of l token it wipes any previous changes to the text.
func (b *BaseLexer) GetText() string {
if b.text != "" {
return b.text
}
return b.Interpreter.GetText(b.input)
}
func (b *BaseLexer) SetText(text string) {
b.text = text
}
func (b *BaseLexer) GetATN() *ATN {
return b.Interpreter.ATN()
}
// Return a list of all Token objects in input char stream.
// Forces load of all tokens. Does not include EOF token.
// /
func (b *BaseLexer) GetAllTokens() []Token {
vl := b.Virt
tokens := make([]Token, 0)
t := vl.NextToken()
for t.GetTokenType() != TokenEOF {
tokens = append(tokens, t)
t = vl.NextToken()
}
return tokens
}
func (b *BaseLexer) notifyListeners(e RecognitionException) {
start := b.TokenStartCharIndex
stop := b.input.Index()
text := b.input.GetTextFromInterval(NewInterval(start, stop))
msg := "token recognition error at: '" + text + "'"
listener := b.GetErrorListenerDispatch()
listener.SyntaxError(b, nil, b.TokenStartLine, b.TokenStartColumn, msg, e)
}
func (b *BaseLexer) getErrorDisplayForChar(c rune) string {
if c == TokenEOF {
return "<EOF>"
} else if c == '\n' {
return "\\n"
} else if c == '\t' {
return "\\t"
} else if c == '\r' {
return "\\r"
} else {
return string(c)
}
}
func (b *BaseLexer) getCharErrorDisplay(c rune) string {
return "'" + b.getErrorDisplayForChar(c) + "'"
}
// Lexers can normally Match any char in it's vocabulary after Matching
// a token, so do the easy thing and just kill a character and hope
// it all works out. You can instead use the rule invocation stack
// to do sophisticated error recovery if you are in a fragment rule.
// /
func (b *BaseLexer) Recover(re RecognitionException) {
if b.input.LA(1) != TokenEOF {
if _, ok := re.(*LexerNoViableAltException); ok {
// Skip a char and try again
b.Interpreter.Consume(b.input)
} else {
// TODO: Do we lose character or line position information?
b.input.Consume()
}
}
}
You can’t perform that action at this time.