Skip to content

Commit

Permalink
feat(html_lexer): support special forms used by i18n { exp, plural, =…
Browse files Browse the repository at this point in the history
…0 {} }
  • Loading branch information
vsavkin committed Apr 18, 2016
1 parent d99823e commit 7f29766
Show file tree
Hide file tree
Showing 2 changed files with 202 additions and 11 deletions.
131 changes: 124 additions & 7 deletions modules/angular2/src/compiler/html_lexer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,11 @@ export enum HtmlTokenType {
ATTR_NAME,
ATTR_VALUE,
DOC_TYPE,
EXPANSION_FORM_START,
EXPANSION_CASE_VALUE,
EXPANSION_CASE_EXP_START,
EXPANSION_CASE_EXP_END,
EXPANSION_FORM_END,
EOF
}

Expand All @@ -43,8 +48,10 @@ export class HtmlTokenizeResult {
constructor(public tokens: HtmlToken[], public errors: HtmlTokenError[]) {}
}

export function tokenizeHtml(sourceContent: string, sourceUrl: string): HtmlTokenizeResult {
return new _HtmlTokenizer(new ParseSourceFile(sourceContent, sourceUrl)).tokenize();
export function tokenizeHtml(sourceContent: string, sourceUrl: string,
tokenizeExpansionForms: boolean = false): HtmlTokenizeResult {
return new _HtmlTokenizer(new ParseSourceFile(sourceContent, sourceUrl), tokenizeExpansionForms)
.tokenize();
}

const $EOF = 0;
Expand Down Expand Up @@ -75,6 +82,9 @@ const $GT = 62;
const $QUESTION = 63;
const $LBRACKET = 91;
const $RBRACKET = 93;
const $LBRACE = 123;
const $RBRACE = 125;
const $COMMA = 44;
const $A = 65;
const $F = 70;
const $X = 88;
Expand Down Expand Up @@ -108,16 +118,20 @@ class _HtmlTokenizer {
private length: number;
// Note: this is always lowercase!
private peek: number = -1;
private nextPeek: number = -1;
private index: number = -1;
private line: number = 0;
private column: number = -1;
private currentTokenStart: ParseLocation;
private currentTokenType: HtmlTokenType;

private inExpansionCase: boolean = false;
private inExpansionForm: boolean = false;

tokens: HtmlToken[] = [];
errors: HtmlTokenError[] = [];

constructor(private file: ParseSourceFile) {
constructor(private file: ParseSourceFile, private tokenizeExpansionForms: boolean) {
this.input = file.content;
this.length = file.content.length;
this._advance();
Expand Down Expand Up @@ -149,6 +163,18 @@ class _HtmlTokenizer {
} else {
this._consumeTagOpen(start);
}
} else if (isSpecialFormStart(this.peek, this.nextPeek) && this.tokenizeExpansionForms) {
this._consumeExpansionFormStart();

} else if (this.peek === $EQ && this.tokenizeExpansionForms) {
this._consumeExpansionCaseStart();

} else if (this.peek === $RBRACE && this.inExpansionCase && this.tokenizeExpansionForms) {
this._consumeExpansionCaseEnd();

} else if (this.peek === $RBRACE && !this.inExpansionCase && this.tokenizeExpansionForms) {
this._consumeExpansionFormEnd();

} else {
this._consumeText();
}
Expand Down Expand Up @@ -218,6 +244,8 @@ class _HtmlTokenizer {
}
this.index++;
this.peek = this.index >= this.length ? $EOF : StringWrapper.charCodeAt(this.input, this.index);
this.nextPeek =
this.index + 1 >= this.length ? $EOF : StringWrapper.charCodeAt(this.input, this.index + 1);
}

private _attemptCharCode(charCode: number): boolean {
Expand Down Expand Up @@ -506,20 +534,109 @@ class _HtmlTokenizer {
this._endToken(prefixAndName);
}

private _consumeExpansionFormStart() {
this._beginToken(HtmlTokenType.EXPANSION_FORM_START, this._getLocation());
this._requireCharCode($LBRACE);
this._endToken([]);

this._beginToken(HtmlTokenType.RAW_TEXT, this._getLocation());
let condition = this._readUntil($COMMA);
this._endToken([condition], this._getLocation());
this._requireCharCode($COMMA);
this._attemptCharCodeUntilFn(isNotWhitespace);

this._beginToken(HtmlTokenType.RAW_TEXT, this._getLocation());
let type = this._readUntil($COMMA);
this._endToken([type], this._getLocation());
this._requireCharCode($COMMA);
this._attemptCharCodeUntilFn(isNotWhitespace);

this.inExpansionForm = true;
}

private _consumeExpansionCaseStart() {
this._requireCharCode($EQ);

this._beginToken(HtmlTokenType.EXPANSION_CASE_VALUE, this._getLocation());
let value = this._readUntil($LBRACE).trim();
this._endToken([value], this._getLocation());
this._attemptCharCodeUntilFn(isNotWhitespace);

this._beginToken(HtmlTokenType.EXPANSION_CASE_EXP_START, this._getLocation());
this._requireCharCode($LBRACE);
this._endToken([], this._getLocation());
this._attemptCharCodeUntilFn(isNotWhitespace);

this.inExpansionCase = true;
}

private _consumeExpansionCaseEnd() {
this._beginToken(HtmlTokenType.EXPANSION_CASE_EXP_END, this._getLocation());
this._requireCharCode($RBRACE);
this._endToken([], this._getLocation());
this._attemptCharCodeUntilFn(isNotWhitespace);

this.inExpansionCase = false;
}

private _consumeExpansionFormEnd() {
this._beginToken(HtmlTokenType.EXPANSION_FORM_END, this._getLocation());
this._requireCharCode($RBRACE);
this._endToken([]);

this.inExpansionForm = false;
}

private _consumeText() {
var start = this._getLocation();
this._beginToken(HtmlTokenType.TEXT, start);
var parts = [this._readChar(true)];
while (!isTextEnd(this.peek)) {

var parts = [];
let interpolation = false;

if (this.peek === $LBRACE && this.nextPeek === $LBRACE) {
parts.push(this._readChar(true));
parts.push(this._readChar(true));
interpolation = true;
} else {
parts.push(this._readChar(true));
}

while (!this.isTextEnd(interpolation)) {
if (this.peek === $LBRACE && this.nextPeek === $LBRACE) {
parts.push(this._readChar(true));
parts.push(this._readChar(true));
interpolation = true;
} else if (this.peek === $RBRACE && this.nextPeek === $RBRACE && interpolation) {
parts.push(this._readChar(true));
parts.push(this._readChar(true));
interpolation = false;
} else {
parts.push(this._readChar(true));
}
}
this._endToken([this._processCarriageReturns(parts.join(''))]);
}

private isTextEnd(interpolation: boolean): boolean {
if (this.peek === $LT || this.peek === $EOF) return true;
if (this.tokenizeExpansionForms) {
if (isSpecialFormStart(this.peek, this.nextPeek)) return true;
if (this.peek === $RBRACE && !interpolation && this.inExpansionForm) return true;
}
return false;
}

private _savePosition(): number[] {
return [this.peek, this.index, this.column, this.line, this.tokens.length];
}

private _readUntil(char: number): string {
let start = this.index;
this._attemptUntilChar(char);
return this.input.substring(start, this.index);
}

private _restorePosition(position: number[]): void {
this.peek = position[0];
this.index = position[1];
Expand Down Expand Up @@ -558,8 +675,8 @@ function isNamedEntityEnd(code: number): boolean {
return code == $SEMICOLON || code == $EOF || !isAsciiLetter(code);
}

function isTextEnd(code: number): boolean {
return code === $LT || code === $EOF;
function isSpecialFormStart(peek: number, nextPeek: number): boolean {
return peek === $LBRACE && nextPeek != $LBRACE;
}

function isAsciiLetter(code: number): boolean {
Expand Down
82 changes: 78 additions & 4 deletions modules/angular2/test/compiler/html_lexer_spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -576,6 +576,78 @@ export function main() {

});

describe("expansion forms", () => {
it("should parse an expansion form", () => {
expect(tokenizeAndHumanizeParts('{one.two, three, =4 {four} =5 {five} }', true))
.toEqual([
[HtmlTokenType.EXPANSION_FORM_START],
[HtmlTokenType.RAW_TEXT, 'one.two'],
[HtmlTokenType.RAW_TEXT, 'three'],
[HtmlTokenType.EXPANSION_CASE_VALUE, '4'],
[HtmlTokenType.EXPANSION_CASE_EXP_START],
[HtmlTokenType.TEXT, 'four'],
[HtmlTokenType.EXPANSION_CASE_EXP_END],
[HtmlTokenType.EXPANSION_CASE_VALUE, '5'],
[HtmlTokenType.EXPANSION_CASE_EXP_START],
[HtmlTokenType.TEXT, 'five'],
[HtmlTokenType.EXPANSION_CASE_EXP_END],
[HtmlTokenType.EXPANSION_FORM_END],
[HtmlTokenType.EOF]
]);
});

it("should parse an expansion form with text elements surrounding it", () => {
expect(tokenizeAndHumanizeParts('before{one.two, three, =4 {four}}after', true))
.toEqual([
[HtmlTokenType.TEXT, "before"],
[HtmlTokenType.EXPANSION_FORM_START],
[HtmlTokenType.RAW_TEXT, 'one.two'],
[HtmlTokenType.RAW_TEXT, 'three'],
[HtmlTokenType.EXPANSION_CASE_VALUE, '4'],
[HtmlTokenType.EXPANSION_CASE_EXP_START],
[HtmlTokenType.TEXT, 'four'],
[HtmlTokenType.EXPANSION_CASE_EXP_END],
[HtmlTokenType.EXPANSION_FORM_END],
[HtmlTokenType.TEXT, "after"],
[HtmlTokenType.EOF]
]);
});

it("should parse an expansion forms with elements in it", () => {
expect(tokenizeAndHumanizeParts('{one.two, three, =4 {four <b>a</b>}}', true))
.toEqual([
[HtmlTokenType.EXPANSION_FORM_START],
[HtmlTokenType.RAW_TEXT, 'one.two'],
[HtmlTokenType.RAW_TEXT, 'three'],
[HtmlTokenType.EXPANSION_CASE_VALUE, '4'],
[HtmlTokenType.EXPANSION_CASE_EXP_START],
[HtmlTokenType.TEXT, 'four '],
[HtmlTokenType.TAG_OPEN_START, null, 'b'],
[HtmlTokenType.TAG_OPEN_END],
[HtmlTokenType.TEXT, 'a'],
[HtmlTokenType.TAG_CLOSE, null, 'b'],
[HtmlTokenType.EXPANSION_CASE_EXP_END],
[HtmlTokenType.EXPANSION_FORM_END],
[HtmlTokenType.EOF]
]);
});

it("should parse an expansion forms with interpolation in it", () => {
expect(tokenizeAndHumanizeParts('{one.two, three, =4 {four {{a}}}}', true))
.toEqual([
[HtmlTokenType.EXPANSION_FORM_START],
[HtmlTokenType.RAW_TEXT, 'one.two'],
[HtmlTokenType.RAW_TEXT, 'three'],
[HtmlTokenType.EXPANSION_CASE_VALUE, '4'],
[HtmlTokenType.EXPANSION_CASE_EXP_START],
[HtmlTokenType.TEXT, 'four {{a}}'],
[HtmlTokenType.EXPANSION_CASE_EXP_END],
[HtmlTokenType.EXPANSION_FORM_END],
[HtmlTokenType.EOF]
]);
});
});

describe('errors', () => {
it('should include 2 lines of context in message', () => {
let src = "111\n222\n333\nE\n444\n555\n666\n";
Expand Down Expand Up @@ -604,17 +676,19 @@ export function main() {
});
}

function tokenizeWithoutErrors(input: string): HtmlToken[] {
var tokenizeResult = tokenizeHtml(input, 'someUrl');
function tokenizeWithoutErrors(input: string,
tokenizeExpansionForms: boolean = false): HtmlToken[] {
var tokenizeResult = tokenizeHtml(input, 'someUrl', tokenizeExpansionForms);
if (tokenizeResult.errors.length > 0) {
var errorString = tokenizeResult.errors.join('\n');
throw new BaseException(`Unexpected parse errors:\n${errorString}`);
}
return tokenizeResult.tokens;
}

function tokenizeAndHumanizeParts(input: string): any[] {
return tokenizeWithoutErrors(input).map(token => [<any>token.type].concat(token.parts));
function tokenizeAndHumanizeParts(input: string, tokenizeExpansionForms: boolean = false): any[] {
return tokenizeWithoutErrors(input, tokenizeExpansionForms)
.map(token => [<any>token.type].concat(token.parts));
}

function tokenizeAndHumanizeSourceSpans(input: string): any[] {
Expand Down

0 comments on commit 7f29766

Please sign in to comment.