Skip to content

Commit

Permalink
Consider soft keyword for various parsing logic (#11474)
Browse files Browse the repository at this point in the history
## Summary

This PR updates various checks to consider soft keywords.

1. Update the following methods to also check whether the parser is at a
soft keyword token:
	1. `at_simple_stmt`
	2. `at_stmt`
	3. `at_expr`
	4. `at_pattern_start`
	5. `at_mapping_pattern_start`
2. Update various references of `parser.at(TokenKind::Name)` to use
`parser.at_name_or_soft_keyword`. In the future, we should update it to
also include other keywords on a case-by-case basis.
3. Re-use `parse_name` and `parse_identifier` for literal pattern
parsing. We should always use either of these methods instead of doing
the same manually to make sure that soft keyword are correctly
tranformed.

For (i) and (ii), we could've just extended the `EXPR_SET` with the soft
keyword tokens but I think it's better to be explicit about what the
method checks and to have separation between the token set corresponding
to statement and soft keywords.

## Test Plan

Add a few test cases and update the snapshots. These snapshots were
generated through the local fork of the parser which compiles.
  • Loading branch information
dhruvmanila committed May 27, 2024
1 parent c393e70 commit 306f702
Show file tree
Hide file tree
Showing 15 changed files with 774 additions and 76 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
try: ...
except Exception as match: ...
except Exception as case: ...
except Exception as type: ...
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from match import pattern
from type import bar
from case import pattern
from match.type.case import foo
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
import foo as match
import bar as case
import baz as type
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
match foo:
case case: ...
case match: ...
case type: ...
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
match foo:
case match.bar: ...
case case.bar: ...
case type.bar: ...
case match.case.type.bar.type.case.match: ...
20 changes: 18 additions & 2 deletions crates/ruff_python_parser/src/parser/expression.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@ const LITERAL_SET: TokenSet = TokenSet::new([

/// Tokens that represents either an expression or the start of one.
pub(super) const EXPR_SET: TokenSet = TokenSet::new([
TokenKind::Case,
TokenKind::Name,
TokenKind::Minus,
TokenKind::Plus,
Expand Down Expand Up @@ -108,9 +107,24 @@ pub(super) const END_EXPR_SET: TokenSet = TokenSet::new([
const END_SEQUENCE_SET: TokenSet = END_EXPR_SET.remove(TokenKind::Comma);

impl<'src> Parser<'src> {
/// Returns `true` if the parser is at a name or keyword (including soft keyword) token.
pub(super) fn at_name_or_keyword(&self) -> bool {
self.at(TokenKind::Name) || self.current_token_kind().is_keyword()
}

/// Returns `true` if the parser is at a name or soft keyword token.
pub(super) fn at_name_or_soft_keyword(&self) -> bool {
self.at(TokenKind::Name) || self.at_soft_keyword()
}

/// Returns `true` if the parser is at a soft keyword token.
pub(super) fn at_soft_keyword(&self) -> bool {
self.current_token_kind().is_soft_keyword()
}

/// Returns `true` if the current token is the start of an expression.
pub(super) fn at_expr(&self) -> bool {
self.at_ts(EXPR_SET)
self.at_ts(EXPR_SET) || self.at_soft_keyword()
}

/// Returns `true` if the current token ends a sequence.
Expand Down Expand Up @@ -1434,6 +1448,8 @@ impl<'src> Parser<'src> {
ParseErrorType::FStringError(FStringErrorType::InvalidConversionFlag),
conversion_flag_range,
);
// TODO(dhruvmanila): Avoid dropping this token
self.bump_any();
ConversionFlag::None
}
} else {
Expand Down
18 changes: 10 additions & 8 deletions crates/ruff_python_parser/src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -418,7 +418,7 @@ impl<'src> Parser<'src> {
///
/// If the current token is not a soft keyword.
pub(crate) fn bump_soft_keyword_as_name(&mut self) {
assert!(self.current_token_kind().is_soft_keyword());
assert!(self.at_soft_keyword());

self.do_bump(TokenKind::Name);
}
Expand Down Expand Up @@ -1050,9 +1050,9 @@ impl RecoveryContextKind {
RecoveryContextKind::Except => p.at(TokenKind::Except),
RecoveryContextKind::AssignmentTargets => p.at(TokenKind::Equal),
RecoveryContextKind::TypeParams => p.at_type_param(),
RecoveryContextKind::ImportNames => p.at(TokenKind::Name),
RecoveryContextKind::ImportNames => p.at_name_or_soft_keyword(),
RecoveryContextKind::ImportFromAsNames(_) => {
matches!(p.current_token_kind(), TokenKind::Star | TokenKind::Name)
p.at(TokenKind::Star) || p.at_name_or_soft_keyword()
}
RecoveryContextKind::Slices => p.at(TokenKind::Colon) || p.at_expr(),
RecoveryContextKind::ListElements
Expand All @@ -1071,11 +1071,13 @@ impl RecoveryContextKind {
RecoveryContextKind::MatchPatternClassArguments => p.at_pattern_start(),
RecoveryContextKind::Arguments => p.at_expr(),
RecoveryContextKind::DeleteTargets => p.at_expr(),
RecoveryContextKind::Identifiers => p.at(TokenKind::Name),
RecoveryContextKind::Parameters(_) => matches!(
p.current_token_kind(),
TokenKind::Name | TokenKind::Star | TokenKind::DoubleStar | TokenKind::Slash
),
RecoveryContextKind::Identifiers => p.at_name_or_soft_keyword(),
RecoveryContextKind::Parameters(_) => {
matches!(
p.current_token_kind(),
TokenKind::Star | TokenKind::DoubleStar | TokenKind::Slash
) || p.at_name_or_soft_keyword()
}
RecoveryContextKind::WithItems(_) => p.at_expr(),
RecoveryContextKind::FStringElements => matches!(
p.current_token_kind(),
Expand Down
97 changes: 44 additions & 53 deletions crates/ruff_python_parser/src/parser/pattern.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,12 +51,12 @@ const MAPPING_PATTERN_START_SET: TokenSet = TokenSet::new([
impl<'src> Parser<'src> {
/// Returns `true` if the current token is a valid start of a pattern.
pub(super) fn at_pattern_start(&self) -> bool {
self.at_ts(PATTERN_START_SET)
self.at_ts(PATTERN_START_SET) || self.at_soft_keyword()
}

/// Returns `true` if the current token is a valid start of a mapping pattern.
pub(super) fn at_mapping_pattern_start(&self) -> bool {
self.at_ts(MAPPING_PATTERN_START_SET)
self.at_ts(MAPPING_PATTERN_START_SET) || self.at_soft_keyword()
}

/// Entry point to start parsing a pattern.
Expand Down Expand Up @@ -439,46 +439,6 @@ impl<'src> Parser<'src> {
range,
})
}
TokenKind::Name if self.peek() == TokenKind::Dot => {
let TokenValue::Name(name) = self.bump_value(TokenKind::Name) else {
unreachable!()
};
let id = Expr::Name(ast::ExprName {
id: name.to_string(),
ctx: ExprContext::Load,
range: self.node_range(start),
});

let attribute = self.parse_attr_expr_for_match_pattern(id, start);

Pattern::MatchValue(ast::PatternMatchValue {
value: Box::new(attribute),
range: self.node_range(start),
})
}
TokenKind::Name => {
let TokenValue::Name(name) = self.bump_value(TokenKind::Name) else {
unreachable!()
};
let range = self.node_range(start);

// test_ok match_as_pattern
// match foo:
// case foo_bar: ...
// case _: ...
Pattern::MatchAs(ast::PatternMatchAs {
range,
pattern: None,
name: if &*name == "_" {
None
} else {
Some(ast::Identifier {
id: name.to_string(),
range,
})
},
})
}
kind => {
// The `+` is only for better error recovery.
if let Some(unary_arithmetic_op) = kind.as_unary_arithmetic_operator() {
Expand Down Expand Up @@ -507,26 +467,57 @@ impl<'src> Parser<'src> {
}
}

// Upon encountering an unexpected token, return a `Pattern::MatchValue` containing
// an empty `Expr::Name`.
let invalid_node = if kind.is_keyword() {
Expr::Name(self.parse_name())
if self.at_name_or_keyword() {
if self.peek() == TokenKind::Dot {
// test_ok match_attr_pattern_soft_keyword
// match foo:
// case match.bar: ...
// case case.bar: ...
// case type.bar: ...
// case match.case.type.bar.type.case.match: ...
let id = Expr::Name(self.parse_name());

let attribute = self.parse_attr_expr_for_match_pattern(id, start);

Pattern::MatchValue(ast::PatternMatchValue {
value: Box::new(attribute),
range: self.node_range(start),
})
} else {
// test_ok match_as_pattern_soft_keyword
// match foo:
// case case: ...
// case match: ...
// case type: ...
let ident = self.parse_identifier();

// test_ok match_as_pattern
// match foo:
// case foo_bar: ...
// case _: ...
Pattern::MatchAs(ast::PatternMatchAs {
range: ident.range,
pattern: None,
name: if &ident == "_" { None } else { Some(ident) },
})
}
} else {
// Upon encountering an unexpected token, return a `Pattern::MatchValue` containing
// an empty `Expr::Name`.
self.add_error(
ParseErrorType::OtherError("Expected a pattern".to_string()),
self.current_token_range(),
);
Expr::Name(ast::ExprName {
let invalid_node = Expr::Name(ast::ExprName {
range: self.missing_node_range(),
id: String::new(),
ctx: ExprContext::Invalid,
});
Pattern::MatchValue(ast::PatternMatchValue {
range: invalid_node.range(),
value: Box::new(invalid_node),
})
};

Pattern::MatchValue(ast::PatternMatchValue {
range: invalid_node.range(),
value: Box::new(invalid_node),
})
}
}
}
}
Expand Down
28 changes: 21 additions & 7 deletions crates/ruff_python_parser/src/parser/statement.rs
Original file line number Diff line number Diff line change
Expand Up @@ -85,13 +85,13 @@ impl<'src> Parser<'src> {
/// Returns `true` if the current token is the start of a simple statement,
/// including expressions.
fn at_simple_stmt(&self) -> bool {
self.at_ts(SIMPLE_STMT_WITH_EXPR_SET)
self.at_ts(SIMPLE_STMT_WITH_EXPR_SET) || self.at_soft_keyword()
}

/// Returns `true` if the current token is the start of a simple, compound or expression
/// statement.
pub(super) fn at_stmt(&self) -> bool {
self.at_ts(STMTS_SET)
self.at_ts(STMTS_SET) || self.at_soft_keyword()
}

/// Checks if the parser is currently positioned at the start of a type parameter.
Expand Down Expand Up @@ -528,7 +528,12 @@ impl<'src> Parser<'src> {
}
}

let module = if self.at(TokenKind::Name) {
let module = if self.at_name_or_soft_keyword() {
// test_ok from_import_soft_keyword_module_name
// from match import pattern
// from type import bar
// from case import pattern
// from match.type.case import foo
Some(self.parse_dotted_name())
} else {
if leading_dots == 0 {
Expand Down Expand Up @@ -633,7 +638,11 @@ impl<'src> Parser<'src> {
};

let asname = if self.eat(TokenKind::As) {
if self.at(TokenKind::Name) {
if self.at_name_or_soft_keyword() {
// test_ok import_as_name_soft_keyword
// import foo as match
// import bar as case
// import baz as type
Some(self.parse_identifier())
} else {
// test_err import_alias_missing_asname
Expand Down Expand Up @@ -1500,7 +1509,12 @@ impl<'src> Parser<'src> {
};

let name = if self.eat(TokenKind::As) {
if self.at(TokenKind::Name) {
if self.at_name_or_soft_keyword() {
// test_ok except_stmt_as_name_soft_keyword
// try: ...
// except Exception as match: ...
// except Exception as case: ...
// except Exception as type: ...
Some(self.parse_identifier())
} else {
// test_err except_stmt_missing_as_name
Expand Down Expand Up @@ -3008,7 +3022,7 @@ impl<'src> Parser<'src> {
let star_range = parser.current_token_range();
parser.bump(TokenKind::Star);

if parser.at(TokenKind::Name) {
if parser.at_name_or_soft_keyword() {
let param = parser.parse_parameter(param_start, function_kind, AllowStarAnnotation::Yes);
let param_star_range = parser.node_range(star_range.start());

Expand Down Expand Up @@ -3167,7 +3181,7 @@ impl<'src> Parser<'src> {

last_keyword_only_separator_range = None;
}
TokenKind::Name => {
_ if parser.at_name_or_soft_keyword() => {
let param = parser.parse_parameter_with_default(param_start, function_kind);

// TODO(dhruvmanila): Pyright seems to only highlight the first non-default argument
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,14 +42,14 @@ Module(
),
],
patterns: [
MatchValue(
PatternMatchValue {
MatchAs(
PatternMatchAs {
range: 164..166,
value: Name(
ExprName {
range: 164..166,
pattern: None,
name: Some(
Identifier {
id: "as",
ctx: Load,
range: 164..166,
},
),
},
Expand Down
Loading

0 comments on commit 306f702

Please sign in to comment.