Avoid match statement misidentification in token rules (#3129)

astral-sh · Feb 22, 2023 · 1efa2e0 · 1efa2e0
1 parent df3932f
commit 1efa2e0
Show file tree

Hide file tree

Showing 25 changed files with 128 additions and 66 deletions.
diff --git a/Cargo.lock b/Cargo.lock
diff --git a/Cargo.toml b/Cargo.toml
@@ -13,8 +13,8 @@ libcst = { git = "https://github.com/charliermarsh/LibCST", rev = "f2f0b7a487a87
 once_cell = { version = "1.16.0" }
 regex = { version = "1.6.0" }
 rustc-hash = { version = "1.1.0" }
-rustpython-common = { git = "https://github.com/RustPython/RustPython.git", rev = "ddf497623ae56d21aa4166ff1c0725a7db67e955" }
-rustpython-parser = { features = ["lalrpop"], git = "https://github.com/RustPython/RustPython.git", rev = "ddf497623ae56d21aa4166ff1c0725a7db67e955" }
+rustpython-common = { git = "https://github.com/RustPython/RustPython.git", rev = "6d5bbd913c7c46518f4ed8b1b378ffb0df72f505" }
+rustpython-parser = { features = ["lalrpop"], git = "https://github.com/RustPython/RustPython.git", rev = "6d5bbd913c7c46518f4ed8b1b378ffb0df72f505" }
 schemars = { version = "0.8.11" }
 serde = { version = "1.0.147", features = ["derive"] }
 serde_json = { version = "1.0.87" }

diff --git a/crates/ruff/resources/test/fixtures/pycodestyle/E70.py b/crates/ruff/resources/test/fixtures/pycodestyle/E70.py
@@ -57,3 +57,6 @@ class C: ...; ...
 #: E701:2:12
 match *0, 1, *2:
     case 0,: y = 0
+#:
+class Foo:
+    match: Optional[Match] = None
diff --git a/crates/ruff/src/ast/helpers.rs b/crates/ruff/src/ast/helpers.rs
@@ -11,6 +11,7 @@ use rustpython_parser::ast::{
 };
 use rustpython_parser::lexer;
 use rustpython_parser::lexer::Tok;
+use rustpython_parser::mode::Mode;
 use rustpython_parser::token::StringKind;
 use smallvec::{smallvec, SmallVec};
 
@@ -655,7 +656,7 @@ pub fn has_comments<T>(located: &Located<T>, locator: &Locator) -> bool {
 
 /// Returns `true` if a [`Range`] includes at least one comment.
 pub fn has_comments_in(range: Range, locator: &Locator) -> bool {
-    for tok in lexer::make_tokenizer(locator.slice(&range)) {
+    for tok in lexer::make_tokenizer_located(locator.slice(&range), Mode::Module, range.location) {
         match tok {
             Ok((_, tok, _)) => {
                 if matches!(tok, Tok::Comment(..)) {
@@ -870,7 +871,8 @@ pub fn match_parens(start: Location, locator: &Locator) -> Option<Range> {
     let mut fix_start = None;
     let mut fix_end = None;
     let mut count: usize = 0;
-    for (start, tok, end) in lexer::make_tokenizer_located(contents, start).flatten() {
+    for (start, tok, end) in lexer::make_tokenizer_located(contents, Mode::Module, start).flatten()
+    {
         if matches!(tok, Tok::Lpar) {
             if count == 0 {
                 fix_start = Some(start);
@@ -902,7 +904,9 @@ pub fn identifier_range(stmt: &Stmt, locator: &Locator) -> Range {
             | StmtKind::AsyncFunctionDef { .. }
     ) {
         let contents = locator.slice(&Range::from_located(stmt));
-        for (start, tok, end) in lexer::make_tokenizer_located(contents, stmt.location).flatten() {
+        for (start, tok, end) in
+            lexer::make_tokenizer_located(contents, Mode::Module, stmt.location).flatten()
+        {
             if matches!(tok, Tok::Name { .. }) {
                 return Range::new(start, end);
             }
@@ -933,7 +937,7 @@ pub fn find_names<'a, T, U>(
     locator: &'a Locator,
 ) -> impl Iterator<Item = Range> + 'a {
     let contents = locator.slice(&Range::from_located(located));
-    lexer::make_tokenizer_located(contents, located.location)
+    lexer::make_tokenizer_located(contents, Mode::Module, located.location)
         .flatten()
         .filter(|(_, tok, _)| matches!(tok, Tok::Name { .. }))
         .map(|(start, _, end)| Range {
@@ -951,7 +955,7 @@ pub fn excepthandler_name_range(handler: &Excepthandler, locator: &Locator) -> O
         (Some(_), Some(type_)) => {
             let type_end_location = type_.end_location.unwrap();
             let contents = locator.slice(&Range::new(type_end_location, body[0].location));
-            let range = lexer::make_tokenizer_located(contents, type_end_location)
+            let range = lexer::make_tokenizer_located(contents, Mode::Module, type_end_location)
                 .flatten()
                 .tuple_windows()
                 .find(|(tok, next_tok)| {
@@ -978,7 +982,7 @@ pub fn except_range(handler: &Excepthandler, locator: &Locator) -> Range {
         location: handler.location,
         end_location: end,
     });
-    let range = lexer::make_tokenizer_located(contents, handler.location)
+    let range = lexer::make_tokenizer_located(contents, Mode::Module, handler.location)
         .flatten()
         .find(|(_, kind, _)| matches!(kind, Tok::Except { .. }))
         .map(|(location, _, end_location)| Range {
@@ -992,7 +996,7 @@ pub fn except_range(handler: &Excepthandler, locator: &Locator) -> Range {
 /// Find f-strings that don't contain any formatted values in a `JoinedStr`.
 pub fn find_useless_f_strings(expr: &Expr, locator: &Locator) -> Vec<(Range, Range)> {
     let contents = locator.slice(&Range::from_located(expr));
-    lexer::make_tokenizer_located(contents, expr.location)
+    lexer::make_tokenizer_located(contents, Mode::Module, expr.location)
         .flatten()
         .filter_map(|(location, tok, end_location)| match tok {
             Tok::String {
@@ -1046,7 +1050,7 @@ pub fn else_range(stmt: &Stmt, locator: &Locator) -> Option<Range> {
                     .expect("Expected orelse to be non-empty")
                     .location,
             });
-            let range = lexer::make_tokenizer_located(contents, body_end)
+            let range = lexer::make_tokenizer_located(contents, Mode::Module, body_end)
                 .flatten()
                 .find(|(_, kind, _)| matches!(kind, Tok::Else))
                 .map(|(location, _, end_location)| Range {
@@ -1062,7 +1066,7 @@ pub fn else_range(stmt: &Stmt, locator: &Locator) -> Option<Range> {
 /// Return the `Range` of the first `Tok::Colon` token in a `Range`.
 pub fn first_colon_range(range: Range, locator: &Locator) -> Option<Range> {
     let contents = locator.slice(&range);
-    let range = lexer::make_tokenizer_located(contents, range.location)
+    let range = lexer::make_tokenizer_located(contents, Mode::Module, range.location)
         .flatten()
         .find(|(_, kind, _)| matches!(kind, Tok::Colon))
         .map(|(location, _, end_location)| Range {
@@ -1092,7 +1096,7 @@ pub fn elif_else_range(stmt: &Stmt, locator: &Locator) -> Option<Range> {
         _ => return None,
     };
     let contents = locator.slice(&Range::new(start, end));
-    let range = lexer::make_tokenizer_located(contents, start)
+    let range = lexer::make_tokenizer_located(contents, Mode::Module, start)
         .flatten()
         .find(|(_, kind, _)| matches!(kind, Tok::Elif | Tok::Else))
         .map(|(location, _, end_location)| Range {

diff --git a/crates/ruff/src/ast/operations.rs b/crates/ruff/src/ast/operations.rs
@@ -3,6 +3,7 @@ use rustc_hash::FxHashMap;
 use rustpython_parser::ast::{Cmpop, Constant, Expr, ExprKind, Located, Stmt, StmtKind};
 use rustpython_parser::lexer;
 use rustpython_parser::lexer::Tok;
+use rustpython_parser::mode::Mode;
 
 use crate::ast::helpers::any_over_expr;
 use crate::ast::types::{BindingKind, Scope};
@@ -283,7 +284,9 @@ pub type LocatedCmpop<U = ()> = Located<Cmpop, U>;
 /// `CPython` doesn't either. This method iterates over the token stream and
 /// re-identifies [`Cmpop`] nodes, annotating them with valid ranges.
 pub fn locate_cmpops(contents: &str) -> Vec<LocatedCmpop> {
-    let mut tok_iter = lexer::make_tokenizer(contents).flatten().peekable();
+    let mut tok_iter = lexer::make_tokenizer(contents, Mode::Module)
+        .flatten()
+        .peekable();
     let mut ops: Vec<LocatedCmpop> = vec![];
     let mut count: usize = 0;
     loop {

diff --git a/crates/ruff/src/autofix/helpers.rs b/crates/ruff/src/autofix/helpers.rs
@@ -6,6 +6,7 @@ use libcst_native::{
 use rustpython_parser::ast::{ExcepthandlerKind, Expr, Keyword, Location, Stmt, StmtKind};
 use rustpython_parser::lexer;
 use rustpython_parser::lexer::Tok;
+use rustpython_parser::mode::Mode;
 
 use crate::ast::helpers;
 use crate::ast::helpers::to_absolute;
@@ -371,7 +372,9 @@ pub fn remove_argument(
     if n_arguments == 1 {
         // Case 1: there is only one argument.
         let mut count: usize = 0;
-        for (start, tok, end) in lexer::make_tokenizer_located(contents, stmt_at).flatten() {
+        for (start, tok, end) in
+            lexer::make_tokenizer_located(contents, Mode::Module, stmt_at).flatten()
+        {
             if matches!(tok, Tok::Lpar) {
                 if count == 0 {
                     fix_start = Some(if remove_parentheses {
@@ -403,7 +406,9 @@ pub fn remove_argument(
     {
         // Case 2: argument or keyword is _not_ the last node.
         let mut seen_comma = false;
-        for (start, tok, end) in lexer::make_tokenizer_located(contents, stmt_at).flatten() {
+        for (start, tok, end) in
+            lexer::make_tokenizer_located(contents, Mode::Module, stmt_at).flatten()
+        {
             if seen_comma {
                 if matches!(tok, Tok::NonLogicalNewline) {
                     // Also delete any non-logical newlines after the comma.
@@ -426,7 +431,9 @@ pub fn remove_argument(
     } else {
         // Case 3: argument or keyword is the last node, so we have to find the last
         // comma in the stmt.
-        for (start, tok, _) in lexer::make_tokenizer_located(contents, stmt_at).flatten() {
+        for (start, tok, _) in
+            lexer::make_tokenizer_located(contents, Mode::Module, stmt_at).flatten()
+        {
             if start == expr_at {
                 fix_end = Some(expr_end);
                 break;

diff --git a/crates/ruff/src/checkers/logical_lines.rs b/crates/ruff/src/checkers/logical_lines.rs
@@ -154,6 +154,7 @@ pub fn check_logical_lines(
 mod tests {
     use rustpython_parser::lexer;
     use rustpython_parser::lexer::LexResult;
+    use rustpython_parser::mode::Mode;
 
     use crate::checkers::logical_lines::iter_logical_lines;
     use crate::source_code::Locator;
@@ -164,7 +165,7 @@ mod tests {
 x = 1
 y = 2
 z = x + 1"#;
-        let lxr: Vec<LexResult> = lexer::make_tokenizer(contents).collect();
+        let lxr: Vec<LexResult> = lexer::make_tokenizer(contents, Mode::Module).collect();
         let locator = Locator::new(contents);
         let actual: Vec<String> = iter_logical_lines(&lxr, &locator)
             .into_iter()
@@ -185,7 +186,7 @@ x = [
 ]
 y = 2
 z = x + 1"#;
-        let lxr: Vec<LexResult> = lexer::make_tokenizer(contents).collect();
+        let lxr: Vec<LexResult> = lexer::make_tokenizer(contents, Mode::Module).collect();
         let locator = Locator::new(contents);
         let actual: Vec<String> = iter_logical_lines(&lxr, &locator)
             .into_iter()
@@ -199,7 +200,7 @@ z = x + 1"#;
         assert_eq!(actual, expected);
 
         let contents = "x = 'abc'";
-        let lxr: Vec<LexResult> = lexer::make_tokenizer(contents).collect();
+        let lxr: Vec<LexResult> = lexer::make_tokenizer(contents, Mode::Module).collect();
         let locator = Locator::new(contents);
         let actual: Vec<String> = iter_logical_lines(&lxr, &locator)
             .into_iter()
@@ -212,7 +213,7 @@ z = x + 1"#;
 def f():
   x = 1
 f()"#;
-        let lxr: Vec<LexResult> = lexer::make_tokenizer(contents).collect();
+        let lxr: Vec<LexResult> = lexer::make_tokenizer(contents, Mode::Module).collect();
         let locator = Locator::new(contents);
         let actual: Vec<String> = iter_logical_lines(&lxr, &locator)
             .into_iter()
@@ -227,7 +228,7 @@ def f():
   # Comment goes here.
   x = 1
 f()"#;
-        let lxr: Vec<LexResult> = lexer::make_tokenizer(contents).collect();
+        let lxr: Vec<LexResult> = lexer::make_tokenizer(contents, Mode::Module).collect();
         let locator = Locator::new(contents);
         let actual: Vec<String> = iter_logical_lines(&lxr, &locator)
             .into_iter()