Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(compiler): Recover on malformed keyed reads and keyed writes #39004

Closed
wants to merge 5 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
98 changes: 72 additions & 26 deletions packages/compiler/src/expression_parser/parser.ts
Expand Up @@ -288,10 +288,24 @@ export class IvyParser extends Parser {
simpleExpressionChecker = IvySimpleExpressionChecker; //
}

/** Describes a stateful context an expression parser is in. */
enum ParseContextFlags {
None = 0,
/**
* A Writable context is one in which a value may be written to an lvalue.
* For example, after we see a property access, we may expect a write to the
* property via the "=" operator.
* prop
* ^ possible "=" after
*/
Writable = 1,
}

export class _ParseAST {
private rparensExpected = 0;
private rbracketsExpected = 0;
private rbracesExpected = 0;
private context = ParseContextFlags.None;

// Cache of expression start and input indeces to the absolute source span they map to, used to
// prevent creating superfluous source spans in `sourceSpan`.
Expand Down Expand Up @@ -368,6 +382,16 @@ export class _ParseAST {
this.index++;
}

/**
* Executes a callback in the provided context.
*/
private withContext<T>(context: ParseContextFlags, cb: () => T): T {
this.context |= context;
const ret = cb();
this.context ^= context;
return ret;
}

consumeOptionalCharacter(code: number): boolean {
if (this.next.isCharacter(code)) {
this.advance();
Expand All @@ -384,6 +408,12 @@ export class _ParseAST {
return this.next.isKeywordAs();
}

/**
* Consumes an expected character, otherwise emits an error about the missing expected character
* and skips over the token stream until reaching a recoverable point.
*
* See `this.error` and `this.skip` for more details.
*/
expectCharacter(code: number) {
if (this.consumeOptionalCharacter(code)) return;
this.error(`Missing expected ${String.fromCharCode(code)}`);
Expand Down Expand Up @@ -631,18 +661,23 @@ export class _ParseAST {
result = this.parseAccessMemberOrMethodCall(result, true);

} else if (this.consumeOptionalCharacter(chars.$LBRACKET)) {
this.rbracketsExpected++;
const key = this.parsePipe();
this.rbracketsExpected--;
this.expectCharacter(chars.$RBRACKET);
if (this.consumeOptionalOperator('=')) {
const value = this.parseConditional();
result = new KeyedWrite(
this.span(resultStart), this.sourceSpan(resultStart), result, key, value);
} else {
result = new KeyedRead(this.span(resultStart), this.sourceSpan(resultStart), result, key);
}

this.withContext(ParseContextFlags.Writable, () => {
this.rbracketsExpected++;
const key = this.parsePipe();
if (key instanceof EmptyExpr) {
this.error(`Key access cannot be empty`);
}
this.rbracketsExpected--;
ayazhafiz marked this conversation as resolved.
Show resolved Hide resolved
this.expectCharacter(chars.$RBRACKET);
if (this.consumeOptionalOperator('=')) {
const value = this.parseConditional();
result = new KeyedWrite(
this.span(resultStart), this.sourceSpan(resultStart), result, key, value);
} else {
result =
new KeyedRead(this.span(resultStart), this.sourceSpan(resultStart), result, key);
}
});
} else if (this.consumeOptionalCharacter(chars.$LPAREN)) {
this.rparensExpected++;
const args = this.parseCallArguments();
Expand Down Expand Up @@ -994,6 +1029,10 @@ export class _ParseAST {
this.consumeOptionalCharacter(chars.$SEMICOLON) || this.consumeOptionalCharacter(chars.$COMMA);
}

/**
* Records an error and skips over the token stream until reaching a recoverable point. See
* `this.skip` for more details on token skipping.
*/
error(message: string, index: number|null = null) {
this.errors.push(new ParserError(message, this.input, this.locationText(index), this.location));
this.skip();
Expand All @@ -1005,25 +1044,32 @@ export class _ParseAST {
`at the end of the expression`;
}

// Error recovery should skip tokens until it encounters a recovery point. skip() treats
// the end of input and a ';' as unconditionally a recovery point. It also treats ')',
// '}' and ']' as conditional recovery points if one of calling productions is expecting
// one of these symbols. This allows skip() to recover from errors such as '(a.) + 1' allowing
// more of the AST to be retained (it doesn't skip any tokens as the ')' is retained because
// of the '(' begins an '(' <expr> ')' production). The recovery points of grouping symbols
// must be conditional as they must be skipped if none of the calling productions are not
// expecting the closing token else we will never make progress in the case of an
// extraneous group closing symbol (such as a stray ')'). This is not the case for ';' because
// parseChain() is always the root production and it expects a ';'.

// If a production expects one of these token it increments the corresponding nesting count,
// and then decrements it just prior to checking if the token is in the input.
/**
* Error recovery should skip tokens until it encounters a recovery point. skip() treats
* the end of input and a ';' as unconditionally a recovery point. It also treats ')',
* '}' and ']' as conditional recovery points if one of calling productions is expecting
* one of these symbols. This allows skip() to recover from errors such as '(a.) + 1' allowing
* more of the AST to be retained (it doesn't skip any tokens as the ')' is retained because
* of the '(' begins an '(' <expr> ')' production). The recovery points of grouping symbols
* must be conditional as they must be skipped if none of the calling productions are not
* expecting the closing token else we will never make progress in the case of an
* extraneous group closing symbol (such as a stray ')'). This is not the case for ';' because
* parseChain() is always the root production and it expects a ';'.
*
* Furthermore, the presence of a stateful context can add more recovery points.
* - in a `Writable` context, we are able to recover after seeing the `=` operator, which
* signals the presence of an independent rvalue expression following the `=` operator.
*
* If a production expects one of these token it increments the corresponding nesting count,
* and then decrements it just prior to checking if the token is in the input.
ayazhafiz marked this conversation as resolved.
Show resolved Hide resolved
*/
private skip() {
let n = this.next;
while (this.index < this.tokens.length && !n.isCharacter(chars.$SEMICOLON) &&
(this.rparensExpected <= 0 || !n.isCharacter(chars.$RPAREN)) &&
(this.rbracesExpected <= 0 || !n.isCharacter(chars.$RBRACE)) &&
(this.rbracketsExpected <= 0 || !n.isCharacter(chars.$RBRACKET))) {
(this.rbracketsExpected <= 0 || !n.isCharacter(chars.$RBRACKET)) &&
(!(this.context & ParseContextFlags.Writable) || !n.isOperator('='))) {
if (this.next.isError()) {
this.errors.push(
new ParserError(this.next.toString()!, this.input, this.locationText(), this.location));
Expand Down
87 changes: 87 additions & 0 deletions packages/compiler/test/expression_parser/parser_spec.ts
Expand Up @@ -154,6 +154,84 @@ describe('parser', () => {
});
});

describe('keyed read', () => {
it('should parse keyed reads', () => {
checkAction('a["a"]');
checkAction('this.a["a"]', 'a["a"]');
checkAction('a.a["a"]');
});

describe('malformed keyed reads', () => {
it('should recover on missing keys', () => {
checkActionWithError('a[]', 'a[]', 'Key access cannot be empty');
});

it('should recover on incomplete expression keys', () => {
checkActionWithError('a[1 + ]', 'a[1 + ]', 'Unexpected token ]');
});

it('should recover on unterminated keys', () => {
checkActionWithError(
'a[1 + 2', 'a[1 + 2]', 'Missing expected ] at the end of the expression');
});

it('should recover on incomplete and unterminated keys', () => {
checkActionWithError(
'a[1 + ', 'a[1 + ]', 'Missing expected ] at the end of the expression');
});
});
});
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I feel like it would almost be worth adding these to a separate test(compiler) commit before the changes to the code to make it clear that we were already able to recover from errors in keyed reads and that the changes only affect keyed writes.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Makes sense, I will do this.


describe('keyed write', () => {
it('should parse keyed writes', () => {
checkAction('a["a"] = 1 + 2');
checkAction('this.a["a"] = 1 + 2', 'a["a"] = 1 + 2');
checkAction('a.a["a"] = 1 + 2');
});

describe('malformed keyed writes', () => {
it('should recover on empty rvalues', () => {
checkActionWithError('a["a"] = ', 'a["a"] = ', 'Unexpected end of expression');
});

it('should recover on incomplete rvalues', () => {
checkActionWithError('a["a"] = 1 + ', 'a["a"] = 1 + ', 'Unexpected end of expression');
});

it('should recover on missing keys', () => {
checkActionWithError('a[] = 1', 'a[] = 1', 'Key access cannot be empty');
});

it('should recover on incomplete expression keys', () => {
checkActionWithError('a[1 + ] = 1', 'a[1 + ] = 1', 'Unexpected token ]');
});

it('should recover on unterminated keys', () => {
checkActionWithError('a[1 + 2 = 1', 'a[1 + 2] = 1', 'Missing expected ]');
});

it('should recover on incomplete and unterminated keys', () => {
const ast = parseAction('a[1 + = 1');
expect(unparse(ast)).toEqual('a[1 + ] = 1');
validate(ast);

const errors = ast.errors.map(e => e.message);
expect(errors.length).toBe(2);
expect(errors[0]).toContain('Unexpected token =');
expect(errors[1]).toContain('Missing expected ]');
Comment on lines +214 to +221
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think it is okay to keep both errors here, but an argument could be made that they are overlapping. Let me know what you think.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think having both is fine.

});

it('should error on writes after a keyed write', () => {
const ast = parseAction('a[1] = 1 = 2');
expect(unparse(ast)).toEqual('a[1] = 1');
validate(ast);

expect(ast.errors.length).toBe(1);
expect(ast.errors[0].message).toContain('Unexpected token \'=\'');
});
});
});

describe('conditional', () => {
it('should parse ternary/conditional expressions', () => {
checkAction('7 == 3 + 4 ? 10 : 20');
Expand Down Expand Up @@ -926,3 +1004,12 @@ function expectActionError(text: string, message: string) {
function expectBindingError(text: string, message: string) {
expectError(validate(parseBinding(text)), message);
}

/**
* Check that an malformed action parses to a recovered AST while emitting an
* error.
*/
function checkActionWithError(text: string, expected: string, error: string) {
checkAction(text, expected);
expectActionError(text, error);
}