Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Consider soft keyword for various parsing logic #11474

Merged
merged 1 commit into from
May 27, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
try: ...
except Exception as match: ...
except Exception as case: ...
except Exception as type: ...
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from match import pattern
from type import bar
from case import pattern
from match.type.case import foo
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
import foo as match
import bar as case
import baz as type
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
match foo:
case case: ...
case match: ...
case type: ...
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
match foo:
case match.bar: ...
case case.bar: ...
case type.bar: ...
case match.case.type.bar.type.case.match: ...
20 changes: 18 additions & 2 deletions crates/ruff_python_parser/src/parser/expression.rs
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@ const LITERAL_SET: TokenSet = TokenSet::new([

/// Tokens that represents either an expression or the start of one.
pub(super) const EXPR_SET: TokenSet = TokenSet::new([
TokenKind::Case,
TokenKind::Name,
TokenKind::Minus,
TokenKind::Plus,
Expand Down Expand Up @@ -108,9 +107,24 @@ pub(super) const END_EXPR_SET: TokenSet = TokenSet::new([
const END_SEQUENCE_SET: TokenSet = END_EXPR_SET.remove(TokenKind::Comma);

impl<'src> Parser<'src> {
/// Returns `true` if the parser is at a name or keyword (including soft keyword) token.
pub(super) fn at_name_or_keyword(&self) -> bool {
self.at(TokenKind::Name) || self.current_token_kind().is_keyword()
}

/// Returns `true` if the parser is at a name or soft keyword token.
pub(super) fn at_name_or_soft_keyword(&self) -> bool {
self.at(TokenKind::Name) || self.at_soft_keyword()
}

/// Returns `true` if the parser is at a soft keyword token.
pub(super) fn at_soft_keyword(&self) -> bool {
self.current_token_kind().is_soft_keyword()
}

/// Returns `true` if the current token is the start of an expression.
pub(super) fn at_expr(&self) -> bool {
self.at_ts(EXPR_SET)
self.at_ts(EXPR_SET) || self.at_soft_keyword()
}

/// Returns `true` if the current token ends a sequence.
Expand Down Expand Up @@ -1434,6 +1448,8 @@ impl<'src> Parser<'src> {
ParseErrorType::FStringError(FStringErrorType::InvalidConversionFlag),
conversion_flag_range,
);
// TODO(dhruvmanila): Avoid dropping this token
self.bump_any();
ConversionFlag::None
}
} else {
Expand Down
18 changes: 10 additions & 8 deletions crates/ruff_python_parser/src/parser/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -418,7 +418,7 @@ impl<'src> Parser<'src> {
///
/// If the current token is not a soft keyword.
pub(crate) fn bump_soft_keyword_as_name(&mut self) {
assert!(self.current_token_kind().is_soft_keyword());
assert!(self.at_soft_keyword());

self.do_bump(TokenKind::Name);
}
Expand Down Expand Up @@ -1050,9 +1050,9 @@ impl RecoveryContextKind {
RecoveryContextKind::Except => p.at(TokenKind::Except),
RecoveryContextKind::AssignmentTargets => p.at(TokenKind::Equal),
RecoveryContextKind::TypeParams => p.at_type_param(),
RecoveryContextKind::ImportNames => p.at(TokenKind::Name),
RecoveryContextKind::ImportNames => p.at_name_or_soft_keyword(),
RecoveryContextKind::ImportFromAsNames(_) => {
matches!(p.current_token_kind(), TokenKind::Star | TokenKind::Name)
p.at(TokenKind::Star) || p.at_name_or_soft_keyword()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think I would just call p.at_name_or_soft_keyword p.at_name to align it with parse_name. The downside is that this can be confusing because there's also a Name token. Could we rename parse_name to parse_identifier?

Copy link
Member Author

@dhruvmanila dhruvmanila May 27, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think I would just call p.at_name_or_soft_keyword p.at_name to align it with parse_name. The downside is that this can be confusing because there's also a Name token.

Yeah, I thought of doing that but I found the explicit name better just to avoid this confusion.

Could we rename parse_name to parse_identifier?

They both exists because sometimes the parser needs to construct an ExprName node while other times it only needs an Identifier.

}
RecoveryContextKind::Slices => p.at(TokenKind::Colon) || p.at_expr(),
RecoveryContextKind::ListElements
Expand All @@ -1071,11 +1071,13 @@ impl RecoveryContextKind {
RecoveryContextKind::MatchPatternClassArguments => p.at_pattern_start(),
RecoveryContextKind::Arguments => p.at_expr(),
RecoveryContextKind::DeleteTargets => p.at_expr(),
RecoveryContextKind::Identifiers => p.at(TokenKind::Name),
RecoveryContextKind::Parameters(_) => matches!(
p.current_token_kind(),
TokenKind::Name | TokenKind::Star | TokenKind::DoubleStar | TokenKind::Slash
),
RecoveryContextKind::Identifiers => p.at_name_or_soft_keyword(),
RecoveryContextKind::Parameters(_) => {
matches!(
p.current_token_kind(),
TokenKind::Star | TokenKind::DoubleStar | TokenKind::Slash
) || p.at_name_or_soft_keyword()
}
RecoveryContextKind::WithItems(_) => p.at_expr(),
RecoveryContextKind::FStringElements => matches!(
p.current_token_kind(),
Expand Down
97 changes: 44 additions & 53 deletions crates/ruff_python_parser/src/parser/pattern.rs
Original file line number Diff line number Diff line change
Expand Up @@ -51,12 +51,12 @@ const MAPPING_PATTERN_START_SET: TokenSet = TokenSet::new([
impl<'src> Parser<'src> {
/// Returns `true` if the current token is a valid start of a pattern.
pub(super) fn at_pattern_start(&self) -> bool {
self.at_ts(PATTERN_START_SET)
self.at_ts(PATTERN_START_SET) || self.at_soft_keyword()
}

/// Returns `true` if the current token is a valid start of a mapping pattern.
pub(super) fn at_mapping_pattern_start(&self) -> bool {
self.at_ts(MAPPING_PATTERN_START_SET)
self.at_ts(MAPPING_PATTERN_START_SET) || self.at_soft_keyword()
}

/// Entry point to start parsing a pattern.
Expand Down Expand Up @@ -439,46 +439,6 @@ impl<'src> Parser<'src> {
range,
})
}
TokenKind::Name if self.peek() == TokenKind::Dot => {
let TokenValue::Name(name) = self.bump_value(TokenKind::Name) else {
unreachable!()
};
let id = Expr::Name(ast::ExprName {
id: name.to_string(),
ctx: ExprContext::Load,
range: self.node_range(start),
});

let attribute = self.parse_attr_expr_for_match_pattern(id, start);

Pattern::MatchValue(ast::PatternMatchValue {
value: Box::new(attribute),
range: self.node_range(start),
})
}
TokenKind::Name => {
let TokenValue::Name(name) = self.bump_value(TokenKind::Name) else {
unreachable!()
};
let range = self.node_range(start);

// test_ok match_as_pattern
// match foo:
// case foo_bar: ...
// case _: ...
Pattern::MatchAs(ast::PatternMatchAs {
range,
pattern: None,
name: if &*name == "_" {
None
} else {
Some(ast::Identifier {
id: name.to_string(),
range,
})
},
})
}
kind => {
// The `+` is only for better error recovery.
if let Some(unary_arithmetic_op) = kind.as_unary_arithmetic_operator() {
Expand Down Expand Up @@ -507,26 +467,57 @@ impl<'src> Parser<'src> {
}
}

// Upon encountering an unexpected token, return a `Pattern::MatchValue` containing
// an empty `Expr::Name`.
let invalid_node = if kind.is_keyword() {
Expr::Name(self.parse_name())
if self.at_name_or_keyword() {
if self.peek() == TokenKind::Dot {
// test_ok match_attr_pattern_soft_keyword
// match foo:
// case match.bar: ...
// case case.bar: ...
// case type.bar: ...
// case match.case.type.bar.type.case.match: ...
let id = Expr::Name(self.parse_name());

let attribute = self.parse_attr_expr_for_match_pattern(id, start);

Pattern::MatchValue(ast::PatternMatchValue {
value: Box::new(attribute),
range: self.node_range(start),
})
} else {
// test_ok match_as_pattern_soft_keyword
// match foo:
// case case: ...
// case match: ...
// case type: ...
let ident = self.parse_identifier();

// test_ok match_as_pattern
// match foo:
// case foo_bar: ...
// case _: ...
Pattern::MatchAs(ast::PatternMatchAs {
range: ident.range,
pattern: None,
name: if &ident == "_" { None } else { Some(ident) },
})
}
} else {
// Upon encountering an unexpected token, return a `Pattern::MatchValue` containing
// an empty `Expr::Name`.
self.add_error(
ParseErrorType::OtherError("Expected a pattern".to_string()),
self.current_token_range(),
);
Expr::Name(ast::ExprName {
let invalid_node = Expr::Name(ast::ExprName {
range: self.missing_node_range(),
id: String::new(),
ctx: ExprContext::Invalid,
});
Pattern::MatchValue(ast::PatternMatchValue {
range: invalid_node.range(),
value: Box::new(invalid_node),
})
};

Pattern::MatchValue(ast::PatternMatchValue {
range: invalid_node.range(),
value: Box::new(invalid_node),
})
}
}
}
}
Expand Down
28 changes: 21 additions & 7 deletions crates/ruff_python_parser/src/parser/statement.rs
Original file line number Diff line number Diff line change
Expand Up @@ -85,13 +85,13 @@ impl<'src> Parser<'src> {
/// Returns `true` if the current token is the start of a simple statement,
/// including expressions.
fn at_simple_stmt(&self) -> bool {
self.at_ts(SIMPLE_STMT_WITH_EXPR_SET)
self.at_ts(SIMPLE_STMT_WITH_EXPR_SET) || self.at_soft_keyword()
}

/// Returns `true` if the current token is the start of a simple, compound or expression
/// statement.
pub(super) fn at_stmt(&self) -> bool {
self.at_ts(STMTS_SET)
self.at_ts(STMTS_SET) || self.at_soft_keyword()
}

/// Checks if the parser is currently positioned at the start of a type parameter.
Expand Down Expand Up @@ -528,7 +528,12 @@ impl<'src> Parser<'src> {
}
}

let module = if self.at(TokenKind::Name) {
let module = if self.at_name_or_soft_keyword() {
// test_ok from_import_soft_keyword_module_name
// from match import pattern
// from type import bar
// from case import pattern
// from match.type.case import foo
Some(self.parse_dotted_name())
} else {
if leading_dots == 0 {
Expand Down Expand Up @@ -633,7 +638,11 @@ impl<'src> Parser<'src> {
};

let asname = if self.eat(TokenKind::As) {
if self.at(TokenKind::Name) {
if self.at_name_or_soft_keyword() {
// test_ok import_as_name_soft_keyword
// import foo as match
// import bar as case
// import baz as type
Some(self.parse_identifier())
} else {
// test_err import_alias_missing_asname
Expand Down Expand Up @@ -1500,7 +1509,12 @@ impl<'src> Parser<'src> {
};

let name = if self.eat(TokenKind::As) {
if self.at(TokenKind::Name) {
if self.at_name_or_soft_keyword() {
// test_ok except_stmt_as_name_soft_keyword
// try: ...
// except Exception as match: ...
// except Exception as case: ...
// except Exception as type: ...
Some(self.parse_identifier())
} else {
// test_err except_stmt_missing_as_name
Expand Down Expand Up @@ -3008,7 +3022,7 @@ impl<'src> Parser<'src> {
let star_range = parser.current_token_range();
parser.bump(TokenKind::Star);

if parser.at(TokenKind::Name) {
if parser.at_name_or_soft_keyword() {
let param = parser.parse_parameter(param_start, function_kind, AllowStarAnnotation::Yes);
let param_star_range = parser.node_range(star_range.start());

Expand Down Expand Up @@ -3167,7 +3181,7 @@ impl<'src> Parser<'src> {

last_keyword_only_separator_range = None;
}
TokenKind::Name => {
_ if parser.at_name_or_soft_keyword() => {
let param = parser.parse_parameter_with_default(param_start, function_kind);

// TODO(dhruvmanila): Pyright seems to only highlight the first non-default argument
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,14 +42,14 @@ Module(
),
],
patterns: [
MatchValue(
PatternMatchValue {
MatchAs(
PatternMatchAs {
range: 164..166,
value: Name(
ExprName {
range: 164..166,
pattern: None,
name: Some(
Identifier {
id: "as",
ctx: Load,
range: 164..166,
},
),
},
Expand Down
Loading
Loading