Skip to content

Commit

Permalink
refactor(compiler): introduce block parsing in lexer (#50895)
Browse files Browse the repository at this point in the history
⚠️Disclaimer⚠️ this PR implements syntax that is still in an open RFC. It will be adjusted once the RFC is closed.

These changes extend the lexer to recognize the concepts of a block group (`{#foo paramA; paramB}{/foo}`) and a block (`{:foo paramA; paramB;}`) which will be useful later on for the control flow and defer proposals. Block groups can be used anywhere and require a closing tag while block can only be used inside of a block.

The idea is that in the next PRs the markup AST will be expanded to have some more specialized node like `ConditionalBlock` or `DeferBlock` which will then be turned into instructions.

PR Close #50895
  • Loading branch information
crisbeto authored and dylhunn committed Jul 11, 2023
1 parent 8758517 commit 29aaded
Show file tree
Hide file tree
Showing 3 changed files with 364 additions and 6 deletions.
113 changes: 109 additions & 4 deletions packages/compiler/src/ml_parser/lexer.ts
Expand Up @@ -89,6 +89,12 @@ export interface TokenizeOptions {
* If true, do not convert CRLF to LF.
*/
preserveLineEndings?: boolean;

// TODO(crisbeto): temporary option to limit access to the block syntax.
/**
* Whether the block syntax is enabled at the compiler level.
*/
tokenizeBlocks?: boolean;
}

export function tokenize(
Expand Down Expand Up @@ -136,8 +142,8 @@ class _Tokenizer {
private _expansionCaseStack: TokenType[] = [];
private _inInterpolation: boolean = false;
private readonly _preserveLineEndings: boolean;
private readonly _escapedString: boolean;
private readonly _i18nNormalizeLineEndingsInICUs: boolean;
private readonly _tokenizeBlocks: boolean;
tokens: Token[] = [];
errors: TokenError[] = [];
nonNormalizedIcuExpressions: Token[] = [];
Expand All @@ -159,8 +165,8 @@ class _Tokenizer {
this._cursor = options.escapedString ? new EscapedCharacterCursor(_file, range) :
new PlainCharacterCursor(_file, range);
this._preserveLineEndings = options.preserveLineEndings || false;
this._escapedString = options.escapedString || false;
this._i18nNormalizeLineEndingsInICUs = options.i18nNormalizeLineEndingsInICUs || false;
this._tokenizeBlocks = options.tokenizeBlocks || false;
try {
this._cursor.init();
} catch (e) {
Expand Down Expand Up @@ -197,6 +203,12 @@ class _Tokenizer {
} else {
this._consumeTagOpen(start);
}
} else if (this._tokenizeBlocks && this._attemptStr('{#')) {
this._consumeBlockGroupOpen(start);
} else if (this._tokenizeBlocks && this._attemptStr('{/')) {
this._consumeBlockGroupClose(start);
} else if (this._tokenizeBlocks && this._attemptStr('{:')) {
this._consumeBlock(start);
} else if (!(this._tokenizeIcu && this._tokenizeExpansionForm())) {
// In (possibly interpolated) text the end of the text is given by `isTextEnd()`, while
// the premature end of an interpolation is given by the start of a new HTML element.
Expand All @@ -212,6 +224,72 @@ class _Tokenizer {
this._endToken([]);
}

private _consumeBlockGroupOpen(start: CharacterCursor) {
this._beginToken(TokenType.BLOCK_GROUP_OPEN_START, start);
const nameCursor = this._cursor.clone();
this._attemptCharCodeUntilFn(code => !isBlockNameChar(code));
this._endToken([this._cursor.getChars(nameCursor)]);
this._consumeBlockParameters();
this._beginToken(TokenType.BLOCK_GROUP_OPEN_END);
this._requireCharCode(chars.$RBRACE);
this._endToken([]);
}

private _consumeBlockGroupClose(start: CharacterCursor) {
this._beginToken(TokenType.BLOCK_GROUP_CLOSE, start);
const nameCursor = this._cursor.clone();
this._attemptCharCodeUntilFn(code => !isBlockNameChar(code));
const name = this._cursor.getChars(nameCursor);
this._requireCharCode(chars.$RBRACE);
this._endToken([name]);
}

private _consumeBlock(start: CharacterCursor) {
this._beginToken(TokenType.BLOCK_OPEN_START, start);
const nameCursor = this._cursor.clone();
this._attemptCharCodeUntilFn(code => !isBlockNameChar(code));
this._endToken([this._cursor.getChars(nameCursor)]);
this._consumeBlockParameters();
this._beginToken(TokenType.BLOCK_OPEN_END);
this._requireCharCode(chars.$RBRACE);
this._endToken([]);
}

private _consumeBlockParameters() {
// Trim the whitespace until the first parameter.
this._attemptCharCodeUntilFn(isBlockParameterChar);

while (this._cursor.peek() !== chars.$RBRACE && this._cursor.peek() !== chars.$EOF) {
this._beginToken(TokenType.BLOCK_PARAMETER);
const start = this._cursor.clone();
let inQuote: number|null = null;

// Consume the parameter until the next semicolon or brace.
// Note that we skip over semicolons/braces inside of strings.
while ((this._cursor.peek() !== chars.$SEMICOLON && this._cursor.peek() !== chars.$RBRACE &&
this._cursor.peek() !== chars.$EOF) ||
inQuote !== null) {
const char = this._cursor.peek();

// Skip to the next character if it was escaped.
if (char === chars.$BACKSLASH) {
this._cursor.advance();
} else if (char === inQuote) {
inQuote = null;
} else if (inQuote === null && chars.isQuote(char)) {
inQuote = char;
}

this._cursor.advance();
}

this._endToken([this._cursor.getChars(start)]);

// Skip to the next parameter.
this._attemptCharCodeUntilFn(isBlockParameterChar);
}
}

/**
* @returns whether an ICU token has been created
* @internal
Expand Down Expand Up @@ -578,7 +656,6 @@ class _Tokenizer {
}

private _consumeAttributeValue() {
let value: string;
if (this._cursor.peek() === chars.$SQ || this._cursor.peek() === chars.$DQ) {
const quoteChar = this._cursor.peek();
this._consumeQuote(quoteChar);
Expand Down Expand Up @@ -795,7 +872,7 @@ class _Tokenizer {
}

private _isTextEnd(): boolean {
if (this._isTagStart() || this._cursor.peek() === chars.$EOF) {
if (this._isTagStart() || this._isBlockStart() || this._cursor.peek() === chars.$EOF) {
return true;
}

Expand Down Expand Up @@ -833,6 +910,26 @@ class _Tokenizer {
return false;
}

private _isBlockStart(): boolean {
if (this._tokenizeBlocks && this._cursor.peek() === chars.$LBRACE) {
const tmp = this._cursor.clone();

// Check that the cursor is on a `{#`, `{/` or `{:`.
tmp.advance();
const next = tmp.peek();
if (next !== chars.$BANG && next !== chars.$SLASH && next !== chars.$COLON) {
return false;
}

// If it is, also verify that the next character is a valid block identifier.
tmp.advance();
if (isBlockNameChar(tmp.peek())) {
return true;
}
}
return false;
}

private _readUntil(char: number): string {
const start = this._cursor.clone();
this._attemptUntilChar(char);
Expand Down Expand Up @@ -900,6 +997,14 @@ function toUpperCaseCharCode(code: number): number {
return code >= chars.$a && code <= chars.$z ? code - chars.$a + chars.$A : code;
}

function isBlockNameChar(code: number): boolean {
return chars.isAsciiLetter(code) || chars.isDigit(code) || code === chars.$_;
}

function isBlockParameterChar(code: number): boolean {
return code !== chars.$SEMICOLON && isNotWhitespace(code);
}

function mergeTextTokens(srcTokens: Token[]): Token[] {
const dstTokens: Token[] = [];
let lastDstToken: Token|undefined = undefined;
Expand Down
41 changes: 39 additions & 2 deletions packages/compiler/src/ml_parser/tokens.ts
Expand Up @@ -33,15 +33,22 @@ export const enum TokenType {
EXPANSION_CASE_EXP_START,
EXPANSION_CASE_EXP_END,
EXPANSION_FORM_END,
EOF
EOF,
BLOCK_GROUP_OPEN_START,
BLOCK_GROUP_OPEN_END,
BLOCK_GROUP_CLOSE,
BLOCK_PARAMETER,
BLOCK_OPEN_START,
BLOCK_OPEN_END,
}

export type Token = TagOpenStartToken|TagOpenEndToken|TagOpenEndVoidToken|TagCloseToken|
IncompleteTagOpenToken|TextToken|InterpolationToken|EncodedEntityToken|CommentStartToken|
CommentEndToken|CdataStartToken|CdataEndToken|AttributeNameToken|AttributeQuoteToken|
AttributeValueTextToken|AttributeValueInterpolationToken|DocTypeToken|ExpansionFormStartToken|
ExpansionCaseValueToken|ExpansionCaseExpressionStartToken|ExpansionCaseExpressionEndToken|
ExpansionFormEndToken|EndOfFileToken;
ExpansionFormEndToken|EndOfFileToken|BlockGroupOpenStartToken|BlockGroupOpenEndToken|
BlockGroupCloseToken|BlockParameterToken|BlockOpenStartToken|BlockOpenEndToken;

export type InterpolatedTextToken = TextToken|InterpolationToken|EncodedEntityToken;

Expand Down Expand Up @@ -170,3 +177,33 @@ export interface EndOfFileToken extends TokenBase {
type: TokenType.EOF;
parts: [];
}

export interface BlockGroupOpenStartToken extends TokenBase {
type: TokenType.BLOCK_GROUP_OPEN_START;
parts: [name: string];
}

export interface BlockGroupOpenEndToken extends TokenBase {
type: TokenType.BLOCK_GROUP_OPEN_END;
parts: [];
}

export interface BlockGroupCloseToken extends TokenBase {
type: TokenType.BLOCK_GROUP_CLOSE;
parts: [name: string];
}

export interface BlockParameterToken extends TokenBase {
type: TokenType.BLOCK_PARAMETER;
parts: [expression: string];
}

export interface BlockOpenStartToken extends TokenBase {
type: TokenType.BLOCK_OPEN_START;
parts: [name: string];
}

export interface BlockOpenEndToken extends TokenBase {
type: TokenType.BLOCK_OPEN_END;
parts: [];
}

0 comments on commit 29aaded

Please sign in to comment.