Skip to content
Permalink
Browse files

feat(compiler): record end of expression Token (#33549)

In the past, only the starting index of an expression Token has been
recorded, so a parser could demarkate the span of a token only by the
start locations of two tokens. This may lead to trailing whitespace
being included in the token span:

```html
{{ token1   + token2 }}
   ^^^^^^^^^             recorded span of `token1`
```

It's also not enough for a parser to determine the end of a token by
adding the length of the token value to the token's start location,
because lexed expression values may not exactly reflect the source code.
For example, `"d\\"e"` is lexed as a string token whose value is `d"e`.

Instead, this commit adds a `end` field to expression tokens. `end`
is one past the last index of the token source code. This will enable a
parser to determine the span of a token just by looking at that token.

This is a breaking change because the contructor interface of `Token`
has changed.

Part of #33477.

PR Close #33549
  • Loading branch information
ayazhafiz authored and alxhub committed Nov 2, 2019
1 parent 4414fce commit 8a25cd4e968825bfd3d1770465fc6ce2cfde1e8f
Showing with 120 additions and 115 deletions.
  1. +26 −25 packages/compiler/src/expression_parser/lexer.ts
  2. +94 −90 packages/compiler/test/expression_parser/lexer_spec.ts
@@ -35,7 +35,7 @@ export class Lexer {

export class Token {
constructor(
public index: number, public type: TokenType, public numValue: number,
public index: number, public end: number, public type: TokenType, public numValue: number,
public strValue: string) {}

isCharacter(code: number): boolean {
@@ -91,35 +91,35 @@ export class Token {
}
}

function newCharacterToken(index: number, code: number): Token {
return new Token(index, TokenType.Character, code, String.fromCharCode(code));
function newCharacterToken(index: number, end: number, code: number): Token {
return new Token(index, end, TokenType.Character, code, String.fromCharCode(code));
}

function newIdentifierToken(index: number, text: string): Token {
return new Token(index, TokenType.Identifier, 0, text);
function newIdentifierToken(index: number, end: number, text: string): Token {
return new Token(index, end, TokenType.Identifier, 0, text);
}

function newKeywordToken(index: number, text: string): Token {
return new Token(index, TokenType.Keyword, 0, text);
function newKeywordToken(index: number, end: number, text: string): Token {
return new Token(index, end, TokenType.Keyword, 0, text);
}

function newOperatorToken(index: number, text: string): Token {
return new Token(index, TokenType.Operator, 0, text);
function newOperatorToken(index: number, end: number, text: string): Token {
return new Token(index, end, TokenType.Operator, 0, text);
}

function newStringToken(index: number, text: string): Token {
return new Token(index, TokenType.String, 0, text);
function newStringToken(index: number, end: number, text: string): Token {
return new Token(index, end, TokenType.String, 0, text);
}

function newNumberToken(index: number, n: number): Token {
return new Token(index, TokenType.Number, n, '');
function newNumberToken(index: number, end: number, n: number): Token {
return new Token(index, end, TokenType.Number, n, '');
}

function newErrorToken(index: number, message: string): Token {
return new Token(index, TokenType.Error, 0, message);
function newErrorToken(index: number, end: number, message: string): Token {
return new Token(index, end, TokenType.Error, 0, message);
}

export const EOF: Token = new Token(-1, TokenType.Character, 0, '');
export const EOF: Token = new Token(-1, -1, TokenType.Character, 0, '');

class _Scanner {
length: number;
@@ -165,7 +165,7 @@ class _Scanner {
case chars.$PERIOD:
this.advance();
return chars.isDigit(this.peek) ? this.scanNumber(start) :
newCharacterToken(start, chars.$PERIOD);
newCharacterToken(start, this.index, chars.$PERIOD);
case chars.$LPAREN:
case chars.$RPAREN:
case chars.$LBRACE:
@@ -211,13 +211,13 @@ class _Scanner {

scanCharacter(start: number, code: number): Token {
this.advance();
return newCharacterToken(start, code);
return newCharacterToken(start, this.index, code);
}


scanOperator(start: number, str: string): Token {
this.advance();
return newOperatorToken(start, str);
return newOperatorToken(start, this.index, str);
}

/**
@@ -243,16 +243,16 @@ class _Scanner {
this.advance();
str += three;
}
return newOperatorToken(start, str);
return newOperatorToken(start, this.index, str);
}

scanIdentifier(): Token {
const start: number = this.index;
this.advance();
while (isIdentifierPart(this.peek)) this.advance();
const str: string = this.input.substring(start, this.index);
return KEYWORDS.indexOf(str) > -1 ? newKeywordToken(start, str) :
newIdentifierToken(start, str);
return KEYWORDS.indexOf(str) > -1 ? newKeywordToken(start, this.index, str) :
newIdentifierToken(start, this.index, str);
}

scanNumber(start: number): Token {
@@ -275,7 +275,7 @@ class _Scanner {
}
const str: string = this.input.substring(start, this.index);
const value: number = simple ? parseIntAutoRadix(str) : parseFloat(str);
return newNumberToken(start, value);
return newNumberToken(start, this.index, value);
}

scanString(): Token {
@@ -321,13 +321,14 @@ class _Scanner {
const last: string = input.substring(marker, this.index);
this.advance(); // Skip terminating quote.

return newStringToken(start, buffer + last);
return newStringToken(start, this.index, buffer + last);
}

error(message: string, offset: number): Token {
const position: number = this.index + offset;
return newErrorToken(
position, `Lexer Error: ${message} at column ${position} in expression [${this.input}]`);
position, this.index,
`Lexer Error: ${message} at column ${position} in expression [${this.input}]`);
}
}

0 comments on commit 8a25cd4

Please sign in to comment.
You can’t perform that action at this time.