From 1efa2e07ad1303d39223e6f9b359e842c5563714 Mon Sep 17 00:00:00 2001 From: Charlie Marsh Date: Wed, 22 Feb 2023 10:44:45 -0500 Subject: [PATCH] Avoid match statement misidentification in token rules (#3129) --- Cargo.lock | 8 ++--- Cargo.toml | 4 +-- .../test/fixtures/pycodestyle/E70.py | 3 ++ crates/ruff/src/ast/helpers.rs | 24 ++++++++------- crates/ruff/src/ast/operations.rs | 5 +++- crates/ruff/src/autofix/helpers.rs | 13 +++++++-- crates/ruff/src/checkers/logical_lines.rs | 11 +++---- crates/ruff/src/directives.rs | 29 +++++++++++++------ .../src/rules/flake8_annotations/fixes.rs | 5 +++- crates/ruff/src/rules/isort/comments.rs | 3 +- crates/ruff/src/rules/isort/helpers.rs | 13 ++++++--- crates/ruff/src/rules/pyflakes/fixes.rs | 6 ++-- .../rules/pyflakes/rules/unused_variable.rs | 12 +++++--- .../pylint/rules/bad_string_format_type.rs | 5 +++- crates/ruff/src/rules/pyupgrade/fixes.rs | 3 +- .../src/rules/pyupgrade/rules/f_strings.rs | 6 ++-- .../rules/pyupgrade/rules/native_literals.rs | 3 +- .../pyupgrade/rules/outdated_version_block.rs | 6 ++-- .../rules/printf_string_formatting.rs | 9 ++++-- .../pyupgrade/rules/redundant_open_modes.rs | 5 +++- crates/ruff/src/rustpython_helpers.rs | 2 +- crates/ruff/src/source_code/indexer.rs | 9 +++--- crates/ruff/src/source_code/stylist.rs | 5 ++-- crates/ruff_dev/src/print_tokens.rs | 3 +- .../src/core/rustpython_helpers.rs | 2 +- 25 files changed, 128 insertions(+), 66 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 7c401aa7c88a4..8715efe410841 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2150,7 +2150,7 @@ dependencies = [ [[package]] name = "rustpython-ast" version = "0.2.0" -source = "git+https://github.com/RustPython/RustPython.git?rev=ddf497623ae56d21aa4166ff1c0725a7db67e955#ddf497623ae56d21aa4166ff1c0725a7db67e955" +source = "git+https://github.com/RustPython/RustPython.git?rev=6d5bbd913c7c46518f4ed8b1b378ffb0df72f505#6d5bbd913c7c46518f4ed8b1b378ffb0df72f505" dependencies = [ "num-bigint", "rustpython-compiler-core", @@ -2159,7 +2159,7 @@ dependencies = [ [[package]] name = "rustpython-common" version = "0.2.0" -source = "git+https://github.com/RustPython/RustPython.git?rev=ddf497623ae56d21aa4166ff1c0725a7db67e955#ddf497623ae56d21aa4166ff1c0725a7db67e955" +source = "git+https://github.com/RustPython/RustPython.git?rev=6d5bbd913c7c46518f4ed8b1b378ffb0df72f505#6d5bbd913c7c46518f4ed8b1b378ffb0df72f505" dependencies = [ "ascii", "bitflags", @@ -2184,7 +2184,7 @@ dependencies = [ [[package]] name = "rustpython-compiler-core" version = "0.2.0" -source = "git+https://github.com/RustPython/RustPython.git?rev=ddf497623ae56d21aa4166ff1c0725a7db67e955#ddf497623ae56d21aa4166ff1c0725a7db67e955" +source = "git+https://github.com/RustPython/RustPython.git?rev=6d5bbd913c7c46518f4ed8b1b378ffb0df72f505#6d5bbd913c7c46518f4ed8b1b378ffb0df72f505" dependencies = [ "bincode", "bitflags", @@ -2201,7 +2201,7 @@ dependencies = [ [[package]] name = "rustpython-parser" version = "0.2.0" -source = "git+https://github.com/RustPython/RustPython.git?rev=ddf497623ae56d21aa4166ff1c0725a7db67e955#ddf497623ae56d21aa4166ff1c0725a7db67e955" +source = "git+https://github.com/RustPython/RustPython.git?rev=6d5bbd913c7c46518f4ed8b1b378ffb0df72f505#6d5bbd913c7c46518f4ed8b1b378ffb0df72f505" dependencies = [ "ahash", "anyhow", diff --git a/Cargo.toml b/Cargo.toml index 9b948aed93dea..f43216f4682aa 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -13,8 +13,8 @@ libcst = { git = "https://github.com/charliermarsh/LibCST", rev = "f2f0b7a487a87 once_cell = { version = "1.16.0" } regex = { version = "1.6.0" } rustc-hash = { version = "1.1.0" } -rustpython-common = { git = "https://github.com/RustPython/RustPython.git", rev = "ddf497623ae56d21aa4166ff1c0725a7db67e955" } -rustpython-parser = { features = ["lalrpop"], git = "https://github.com/RustPython/RustPython.git", rev = "ddf497623ae56d21aa4166ff1c0725a7db67e955" } +rustpython-common = { git = "https://github.com/RustPython/RustPython.git", rev = "6d5bbd913c7c46518f4ed8b1b378ffb0df72f505" } +rustpython-parser = { features = ["lalrpop"], git = "https://github.com/RustPython/RustPython.git", rev = "6d5bbd913c7c46518f4ed8b1b378ffb0df72f505" } schemars = { version = "0.8.11" } serde = { version = "1.0.147", features = ["derive"] } serde_json = { version = "1.0.87" } diff --git a/crates/ruff/resources/test/fixtures/pycodestyle/E70.py b/crates/ruff/resources/test/fixtures/pycodestyle/E70.py index 7b3d9afb586aa..bfbec79124b8e 100644 --- a/crates/ruff/resources/test/fixtures/pycodestyle/E70.py +++ b/crates/ruff/resources/test/fixtures/pycodestyle/E70.py @@ -57,3 +57,6 @@ class C: ...; ... #: E701:2:12 match *0, 1, *2: case 0,: y = 0 +#: +class Foo: + match: Optional[Match] = None diff --git a/crates/ruff/src/ast/helpers.rs b/crates/ruff/src/ast/helpers.rs index 5631451623d0b..8b72d6fe395db 100644 --- a/crates/ruff/src/ast/helpers.rs +++ b/crates/ruff/src/ast/helpers.rs @@ -11,6 +11,7 @@ use rustpython_parser::ast::{ }; use rustpython_parser::lexer; use rustpython_parser::lexer::Tok; +use rustpython_parser::mode::Mode; use rustpython_parser::token::StringKind; use smallvec::{smallvec, SmallVec}; @@ -655,7 +656,7 @@ pub fn has_comments(located: &Located, locator: &Locator) -> bool { /// Returns `true` if a [`Range`] includes at least one comment. pub fn has_comments_in(range: Range, locator: &Locator) -> bool { - for tok in lexer::make_tokenizer(locator.slice(&range)) { + for tok in lexer::make_tokenizer_located(locator.slice(&range), Mode::Module, range.location) { match tok { Ok((_, tok, _)) => { if matches!(tok, Tok::Comment(..)) { @@ -870,7 +871,8 @@ pub fn match_parens(start: Location, locator: &Locator) -> Option { let mut fix_start = None; let mut fix_end = None; let mut count: usize = 0; - for (start, tok, end) in lexer::make_tokenizer_located(contents, start).flatten() { + for (start, tok, end) in lexer::make_tokenizer_located(contents, Mode::Module, start).flatten() + { if matches!(tok, Tok::Lpar) { if count == 0 { fix_start = Some(start); @@ -902,7 +904,9 @@ pub fn identifier_range(stmt: &Stmt, locator: &Locator) -> Range { | StmtKind::AsyncFunctionDef { .. } ) { let contents = locator.slice(&Range::from_located(stmt)); - for (start, tok, end) in lexer::make_tokenizer_located(contents, stmt.location).flatten() { + for (start, tok, end) in + lexer::make_tokenizer_located(contents, Mode::Module, stmt.location).flatten() + { if matches!(tok, Tok::Name { .. }) { return Range::new(start, end); } @@ -933,7 +937,7 @@ pub fn find_names<'a, T, U>( locator: &'a Locator, ) -> impl Iterator + 'a { let contents = locator.slice(&Range::from_located(located)); - lexer::make_tokenizer_located(contents, located.location) + lexer::make_tokenizer_located(contents, Mode::Module, located.location) .flatten() .filter(|(_, tok, _)| matches!(tok, Tok::Name { .. })) .map(|(start, _, end)| Range { @@ -951,7 +955,7 @@ pub fn excepthandler_name_range(handler: &Excepthandler, locator: &Locator) -> O (Some(_), Some(type_)) => { let type_end_location = type_.end_location.unwrap(); let contents = locator.slice(&Range::new(type_end_location, body[0].location)); - let range = lexer::make_tokenizer_located(contents, type_end_location) + let range = lexer::make_tokenizer_located(contents, Mode::Module, type_end_location) .flatten() .tuple_windows() .find(|(tok, next_tok)| { @@ -978,7 +982,7 @@ pub fn except_range(handler: &Excepthandler, locator: &Locator) -> Range { location: handler.location, end_location: end, }); - let range = lexer::make_tokenizer_located(contents, handler.location) + let range = lexer::make_tokenizer_located(contents, Mode::Module, handler.location) .flatten() .find(|(_, kind, _)| matches!(kind, Tok::Except { .. })) .map(|(location, _, end_location)| Range { @@ -992,7 +996,7 @@ pub fn except_range(handler: &Excepthandler, locator: &Locator) -> Range { /// Find f-strings that don't contain any formatted values in a `JoinedStr`. pub fn find_useless_f_strings(expr: &Expr, locator: &Locator) -> Vec<(Range, Range)> { let contents = locator.slice(&Range::from_located(expr)); - lexer::make_tokenizer_located(contents, expr.location) + lexer::make_tokenizer_located(contents, Mode::Module, expr.location) .flatten() .filter_map(|(location, tok, end_location)| match tok { Tok::String { @@ -1046,7 +1050,7 @@ pub fn else_range(stmt: &Stmt, locator: &Locator) -> Option { .expect("Expected orelse to be non-empty") .location, }); - let range = lexer::make_tokenizer_located(contents, body_end) + let range = lexer::make_tokenizer_located(contents, Mode::Module, body_end) .flatten() .find(|(_, kind, _)| matches!(kind, Tok::Else)) .map(|(location, _, end_location)| Range { @@ -1062,7 +1066,7 @@ pub fn else_range(stmt: &Stmt, locator: &Locator) -> Option { /// Return the `Range` of the first `Tok::Colon` token in a `Range`. pub fn first_colon_range(range: Range, locator: &Locator) -> Option { let contents = locator.slice(&range); - let range = lexer::make_tokenizer_located(contents, range.location) + let range = lexer::make_tokenizer_located(contents, Mode::Module, range.location) .flatten() .find(|(_, kind, _)| matches!(kind, Tok::Colon)) .map(|(location, _, end_location)| Range { @@ -1092,7 +1096,7 @@ pub fn elif_else_range(stmt: &Stmt, locator: &Locator) -> Option { _ => return None, }; let contents = locator.slice(&Range::new(start, end)); - let range = lexer::make_tokenizer_located(contents, start) + let range = lexer::make_tokenizer_located(contents, Mode::Module, start) .flatten() .find(|(_, kind, _)| matches!(kind, Tok::Elif | Tok::Else)) .map(|(location, _, end_location)| Range { diff --git a/crates/ruff/src/ast/operations.rs b/crates/ruff/src/ast/operations.rs index b999563f6eb0c..2b0e8eed0c502 100644 --- a/crates/ruff/src/ast/operations.rs +++ b/crates/ruff/src/ast/operations.rs @@ -3,6 +3,7 @@ use rustc_hash::FxHashMap; use rustpython_parser::ast::{Cmpop, Constant, Expr, ExprKind, Located, Stmt, StmtKind}; use rustpython_parser::lexer; use rustpython_parser::lexer::Tok; +use rustpython_parser::mode::Mode; use crate::ast::helpers::any_over_expr; use crate::ast::types::{BindingKind, Scope}; @@ -283,7 +284,9 @@ pub type LocatedCmpop = Located; /// `CPython` doesn't either. This method iterates over the token stream and /// re-identifies [`Cmpop`] nodes, annotating them with valid ranges. pub fn locate_cmpops(contents: &str) -> Vec { - let mut tok_iter = lexer::make_tokenizer(contents).flatten().peekable(); + let mut tok_iter = lexer::make_tokenizer(contents, Mode::Module) + .flatten() + .peekable(); let mut ops: Vec = vec![]; let mut count: usize = 0; loop { diff --git a/crates/ruff/src/autofix/helpers.rs b/crates/ruff/src/autofix/helpers.rs index ee00b6d44c67a..33bdf33b7b11d 100644 --- a/crates/ruff/src/autofix/helpers.rs +++ b/crates/ruff/src/autofix/helpers.rs @@ -6,6 +6,7 @@ use libcst_native::{ use rustpython_parser::ast::{ExcepthandlerKind, Expr, Keyword, Location, Stmt, StmtKind}; use rustpython_parser::lexer; use rustpython_parser::lexer::Tok; +use rustpython_parser::mode::Mode; use crate::ast::helpers; use crate::ast::helpers::to_absolute; @@ -371,7 +372,9 @@ pub fn remove_argument( if n_arguments == 1 { // Case 1: there is only one argument. let mut count: usize = 0; - for (start, tok, end) in lexer::make_tokenizer_located(contents, stmt_at).flatten() { + for (start, tok, end) in + lexer::make_tokenizer_located(contents, Mode::Module, stmt_at).flatten() + { if matches!(tok, Tok::Lpar) { if count == 0 { fix_start = Some(if remove_parentheses { @@ -403,7 +406,9 @@ pub fn remove_argument( { // Case 2: argument or keyword is _not_ the last node. let mut seen_comma = false; - for (start, tok, end) in lexer::make_tokenizer_located(contents, stmt_at).flatten() { + for (start, tok, end) in + lexer::make_tokenizer_located(contents, Mode::Module, stmt_at).flatten() + { if seen_comma { if matches!(tok, Tok::NonLogicalNewline) { // Also delete any non-logical newlines after the comma. @@ -426,7 +431,9 @@ pub fn remove_argument( } else { // Case 3: argument or keyword is the last node, so we have to find the last // comma in the stmt. - for (start, tok, _) in lexer::make_tokenizer_located(contents, stmt_at).flatten() { + for (start, tok, _) in + lexer::make_tokenizer_located(contents, Mode::Module, stmt_at).flatten() + { if start == expr_at { fix_end = Some(expr_end); break; diff --git a/crates/ruff/src/checkers/logical_lines.rs b/crates/ruff/src/checkers/logical_lines.rs index 62bd85d5670c8..e2702b7f9e39d 100644 --- a/crates/ruff/src/checkers/logical_lines.rs +++ b/crates/ruff/src/checkers/logical_lines.rs @@ -154,6 +154,7 @@ pub fn check_logical_lines( mod tests { use rustpython_parser::lexer; use rustpython_parser::lexer::LexResult; + use rustpython_parser::mode::Mode; use crate::checkers::logical_lines::iter_logical_lines; use crate::source_code::Locator; @@ -164,7 +165,7 @@ mod tests { x = 1 y = 2 z = x + 1"#; - let lxr: Vec = lexer::make_tokenizer(contents).collect(); + let lxr: Vec = lexer::make_tokenizer(contents, Mode::Module).collect(); let locator = Locator::new(contents); let actual: Vec = iter_logical_lines(&lxr, &locator) .into_iter() @@ -185,7 +186,7 @@ x = [ ] y = 2 z = x + 1"#; - let lxr: Vec = lexer::make_tokenizer(contents).collect(); + let lxr: Vec = lexer::make_tokenizer(contents, Mode::Module).collect(); let locator = Locator::new(contents); let actual: Vec = iter_logical_lines(&lxr, &locator) .into_iter() @@ -199,7 +200,7 @@ z = x + 1"#; assert_eq!(actual, expected); let contents = "x = 'abc'"; - let lxr: Vec = lexer::make_tokenizer(contents).collect(); + let lxr: Vec = lexer::make_tokenizer(contents, Mode::Module).collect(); let locator = Locator::new(contents); let actual: Vec = iter_logical_lines(&lxr, &locator) .into_iter() @@ -212,7 +213,7 @@ z = x + 1"#; def f(): x = 1 f()"#; - let lxr: Vec = lexer::make_tokenizer(contents).collect(); + let lxr: Vec = lexer::make_tokenizer(contents, Mode::Module).collect(); let locator = Locator::new(contents); let actual: Vec = iter_logical_lines(&lxr, &locator) .into_iter() @@ -227,7 +228,7 @@ def f(): # Comment goes here. x = 1 f()"#; - let lxr: Vec = lexer::make_tokenizer(contents).collect(); + let lxr: Vec = lexer::make_tokenizer(contents, Mode::Module).collect(); let locator = Locator::new(contents); let actual: Vec = iter_logical_lines(&lxr, &locator) .into_iter() diff --git a/crates/ruff/src/directives.rs b/crates/ruff/src/directives.rs index 888a64cf79840..094074e478cde 100644 --- a/crates/ruff/src/directives.rs +++ b/crates/ruff/src/directives.rs @@ -152,6 +152,7 @@ mod tests { use nohash_hasher::{IntMap, IntSet}; use rustpython_parser::lexer; use rustpython_parser::lexer::LexResult; + use rustpython_parser::mode::Mode; use crate::directives::{extract_isort_directives, extract_noqa_line_for}; @@ -161,6 +162,7 @@ mod tests { "x = 1 y = 2 z = x + 1", + Mode::Module, ) .collect(); assert_eq!(extract_noqa_line_for(&lxr), IntMap::default()); @@ -170,6 +172,7 @@ z = x + 1", x = 1 y = 2 z = x + 1", + Mode::Module, ) .collect(); assert_eq!(extract_noqa_line_for(&lxr), IntMap::default()); @@ -179,6 +182,7 @@ z = x + 1", y = 2 z = x + 1 ", + Mode::Module, ) .collect(); assert_eq!(extract_noqa_line_for(&lxr), IntMap::default()); @@ -189,6 +193,7 @@ z = x + 1 y = 2 z = x + 1 ", + Mode::Module, ) .collect(); assert_eq!(extract_noqa_line_for(&lxr), IntMap::default()); @@ -200,6 +205,7 @@ ghi ''' y = 2 z = x + 1", + Mode::Module, ) .collect(); assert_eq!( @@ -214,6 +220,7 @@ def ghi ''' z = 2", + Mode::Module, ) .collect(); assert_eq!( @@ -227,6 +234,7 @@ y = '''abc def ghi '''", + Mode::Module, ) .collect(); assert_eq!( @@ -237,6 +245,7 @@ ghi let lxr: Vec = lexer::make_tokenizer( r#"x = \ 1"#, + Mode::Module, ) .collect(); assert_eq!(extract_noqa_line_for(&lxr), IntMap::from_iter([(1, 2)])); @@ -245,6 +254,7 @@ ghi r#"from foo import \ bar as baz, \ qux as quux"#, + Mode::Module, ) .collect(); assert_eq!( @@ -262,6 +272,7 @@ x = \ 1 y = \ 2"#, + Mode::Module, ) .collect(); assert_eq!( @@ -275,7 +286,7 @@ y = \ let contents = "x = 1 y = 2 z = x + 1"; - let lxr: Vec = lexer::make_tokenizer(contents).collect(); + let lxr: Vec = lexer::make_tokenizer(contents, Mode::Module).collect(); assert_eq!(extract_isort_directives(&lxr).exclusions, IntSet::default()); let contents = "# isort: off @@ -283,7 +294,7 @@ x = 1 y = 2 # isort: on z = x + 1"; - let lxr: Vec = lexer::make_tokenizer(contents).collect(); + let lxr: Vec = lexer::make_tokenizer(contents, Mode::Module).collect(); assert_eq!( extract_isort_directives(&lxr).exclusions, IntSet::from_iter([2, 3, 4]) @@ -296,7 +307,7 @@ y = 2 # isort: on z = x + 1 # isort: on"; - let lxr: Vec = lexer::make_tokenizer(contents).collect(); + let lxr: Vec = lexer::make_tokenizer(contents, Mode::Module).collect(); assert_eq!( extract_isort_directives(&lxr).exclusions, IntSet::from_iter([2, 3, 4, 5]) @@ -306,7 +317,7 @@ z = x + 1 x = 1 y = 2 z = x + 1"; - let lxr: Vec = lexer::make_tokenizer(contents).collect(); + let lxr: Vec = lexer::make_tokenizer(contents, Mode::Module).collect(); assert_eq!( extract_isort_directives(&lxr).exclusions, IntSet::from_iter([2, 3, 4]) @@ -316,7 +327,7 @@ z = x + 1"; x = 1 y = 2 z = x + 1"; - let lxr: Vec = lexer::make_tokenizer(contents).collect(); + let lxr: Vec = lexer::make_tokenizer(contents, Mode::Module).collect(); assert_eq!(extract_isort_directives(&lxr).exclusions, IntSet::default()); let contents = "# isort: off @@ -325,7 +336,7 @@ x = 1 y = 2 # isort: skip_file z = x + 1"; - let lxr: Vec = lexer::make_tokenizer(contents).collect(); + let lxr: Vec = lexer::make_tokenizer(contents, Mode::Module).collect(); assert_eq!(extract_isort_directives(&lxr).exclusions, IntSet::default()); } @@ -334,20 +345,20 @@ z = x + 1"; let contents = "x = 1 y = 2 z = x + 1"; - let lxr: Vec = lexer::make_tokenizer(contents).collect(); + let lxr: Vec = lexer::make_tokenizer(contents, Mode::Module).collect(); assert_eq!(extract_isort_directives(&lxr).splits, Vec::::new()); let contents = "x = 1 y = 2 # isort: split z = x + 1"; - let lxr: Vec = lexer::make_tokenizer(contents).collect(); + let lxr: Vec = lexer::make_tokenizer(contents, Mode::Module).collect(); assert_eq!(extract_isort_directives(&lxr).splits, vec![3]); let contents = "x = 1 y = 2 # isort: split z = x + 1"; - let lxr: Vec = lexer::make_tokenizer(contents).collect(); + let lxr: Vec = lexer::make_tokenizer(contents, Mode::Module).collect(); assert_eq!(extract_isort_directives(&lxr).splits, vec![2]); } } diff --git a/crates/ruff/src/rules/flake8_annotations/fixes.rs b/crates/ruff/src/rules/flake8_annotations/fixes.rs index 81405d644ac61..f3a24425a1659 100644 --- a/crates/ruff/src/rules/flake8_annotations/fixes.rs +++ b/crates/ruff/src/rules/flake8_annotations/fixes.rs @@ -2,6 +2,7 @@ use anyhow::{bail, Result}; use rustpython_parser::ast::Stmt; use rustpython_parser::lexer; use rustpython_parser::lexer::Tok; +use rustpython_parser::mode::Mode; use crate::ast::types::Range; use crate::fix::Fix; @@ -16,7 +17,9 @@ pub fn add_return_none_annotation(locator: &Locator, stmt: &Stmt) -> Result let mut seen_lpar = false; let mut seen_rpar = false; let mut count: usize = 0; - for (start, tok, ..) in lexer::make_tokenizer_located(contents, range.location).flatten() { + for (start, tok, ..) in + lexer::make_tokenizer_located(contents, Mode::Module, range.location).flatten() + { if seen_lpar && seen_rpar { if matches!(tok, Tok::Colon) { return Ok(Fix::insertion(" -> None".to_string(), start)); diff --git a/crates/ruff/src/rules/isort/comments.rs b/crates/ruff/src/rules/isort/comments.rs index 78e13d0539714..01f4cf98ac3c8 100644 --- a/crates/ruff/src/rules/isort/comments.rs +++ b/crates/ruff/src/rules/isort/comments.rs @@ -3,6 +3,7 @@ use std::borrow::Cow; use rustpython_parser::ast::Location; use rustpython_parser::lexer; use rustpython_parser::lexer::Tok; +use rustpython_parser::mode::Mode; use crate::ast::types::Range; use crate::source_code::Locator; @@ -17,7 +18,7 @@ pub struct Comment<'a> { /// Collect all comments in an import block. pub fn collect_comments<'a>(range: &Range, locator: &'a Locator) -> Vec> { let contents = locator.slice(range); - lexer::make_tokenizer_located(contents, range.location) + lexer::make_tokenizer_located(contents, Mode::Module, range.location) .flatten() .filter_map(|(start, tok, end)| { if let Tok::Comment(value) = tok { diff --git a/crates/ruff/src/rules/isort/helpers.rs b/crates/ruff/src/rules/isort/helpers.rs index 671253fe756f3..c0ad42a2e8d07 100644 --- a/crates/ruff/src/rules/isort/helpers.rs +++ b/crates/ruff/src/rules/isort/helpers.rs @@ -1,19 +1,23 @@ use rustpython_parser::ast::{Location, Stmt}; use rustpython_parser::lexer; use rustpython_parser::lexer::Tok; +use rustpython_parser::mode::Mode; -use super::types::TrailingComma; use crate::ast::helpers::is_docstring_stmt; use crate::ast::types::Range; use crate::source_code::Locator; +use super::types::TrailingComma; + /// Return `true` if a `StmtKind::ImportFrom` statement ends with a magic /// trailing comma. pub fn trailing_comma(stmt: &Stmt, locator: &Locator) -> TrailingComma { let contents = locator.slice(&Range::from_located(stmt)); let mut count: usize = 0; let mut trailing_comma = TrailingComma::Absent; - for (_, tok, _) in lexer::make_tokenizer(contents).flatten() { + for (_, tok, _) in + lexer::make_tokenizer_located(contents, Mode::Module, stmt.location).flatten() + { if matches!(tok, Tok::Lpar) { count += 1; } @@ -110,7 +114,7 @@ pub fn find_splice_location(body: &[Stmt], locator: &Locator) -> Location { // Find the first token that isn't a comment or whitespace. let contents = locator.skip(splice); - for (.., tok, end) in lexer::make_tokenizer_located(contents, splice).flatten() { + for (.., tok, end) in lexer::make_tokenizer_located(contents, Mode::Module, splice).flatten() { if matches!(tok, Tok::Comment(..) | Tok::Newline) { splice = end; } else { @@ -127,9 +131,10 @@ mod tests { use rustpython_parser::ast::Location; use rustpython_parser::parser; - use super::find_splice_location; use crate::source_code::Locator; + use super::find_splice_location; + fn splice_contents(contents: &str) -> Result { let program = parser::parse_program(contents, "")?; let locator = Locator::new(contents); diff --git a/crates/ruff/src/rules/pyflakes/fixes.rs b/crates/ruff/src/rules/pyflakes/fixes.rs index 24328a63da8c3..b6c47ca2f6761 100644 --- a/crates/ruff/src/rules/pyflakes/fixes.rs +++ b/crates/ruff/src/rules/pyflakes/fixes.rs @@ -1,9 +1,11 @@ use anyhow::{bail, Result}; use libcst_native::{Call, Codegen, CodegenState, Dict, DictElement, Expression}; -use ruff_python::string::strip_quotes_and_prefixes; use rustpython_parser::ast::{Excepthandler, Expr}; use rustpython_parser::lexer; use rustpython_parser::lexer::Tok; +use rustpython_parser::mode::Mode; + +use ruff_python::string::strip_quotes_and_prefixes; use crate::ast::types::Range; use crate::cst::matchers::{match_expr, match_module}; @@ -122,7 +124,7 @@ pub fn remove_exception_handler_assignment( // End of the token just before the `as` to the semicolon. let mut prev = None; for (start, tok, end) in - lexer::make_tokenizer_located(contents, excepthandler.location).flatten() + lexer::make_tokenizer_located(contents, Mode::Module, excepthandler.location).flatten() { if matches!(tok, Tok::As) { fix_start = prev; diff --git a/crates/ruff/src/rules/pyflakes/rules/unused_variable.rs b/crates/ruff/src/rules/pyflakes/rules/unused_variable.rs index 03438cbf8d6a0..d090b5e23b62f 100644 --- a/crates/ruff/src/rules/pyflakes/rules/unused_variable.rs +++ b/crates/ruff/src/rules/pyflakes/rules/unused_variable.rs @@ -3,6 +3,7 @@ use log::error; use rustpython_parser::ast::{ExprKind, Located, Stmt, StmtKind}; use rustpython_parser::lexer; use rustpython_parser::lexer::Tok; +use rustpython_parser::mode::Mode; use ruff_macros::{define_violation, derive_message_formats}; @@ -74,9 +75,10 @@ where let mut sqb_count = 0; let mut brace_count = 0; - for ((_, tok, _), (start, _, end)) in lexer::make_tokenizer_located(contents, located.location) - .flatten() - .tuple_windows() + for ((_, tok, _), (start, _, end)) in + lexer::make_tokenizer_located(contents, Mode::Module, located.location) + .flatten() + .tuple_windows() { match tok { Tok::Lpar => { @@ -136,7 +138,9 @@ where let mut sqb_count = 0; let mut brace_count = 0; - for (start, tok, end) in lexer::make_tokenizer_located(contents, located.location).flatten() { + for (start, tok, end) in + lexer::make_tokenizer_located(contents, Mode::Module, located.location).flatten() + { match tok { Tok::Lpar => { par_count += 1; diff --git a/crates/ruff/src/rules/pylint/rules/bad_string_format_type.rs b/crates/ruff/src/rules/pylint/rules/bad_string_format_type.rs index 24bf8a11b673a..953ae3773bf82 100644 --- a/crates/ruff/src/rules/pylint/rules/bad_string_format_type.rs +++ b/crates/ruff/src/rules/pylint/rules/bad_string_format_type.rs @@ -5,6 +5,7 @@ use rustpython_common::cformat::{CFormatPart, CFormatSpec, CFormatStrOrBytes, CF use rustpython_parser::ast::{Constant, Expr, ExprKind, Location, Operator}; use rustpython_parser::lexer; use rustpython_parser::lexer::Tok; +use rustpython_parser::mode::Mode; use ruff_macros::{define_violation, derive_message_formats}; @@ -247,7 +248,9 @@ pub fn bad_string_format_type(checker: &mut Checker, expr: &Expr, right: &Expr) // Grab each string segment (in case there's an implicit concatenation). let content = checker.locator.slice(&Range::from_located(expr)); let mut strings: Vec<(Location, Location)> = vec![]; - for (start, tok, end) in lexer::make_tokenizer_located(content, expr.location).flatten() { + for (start, tok, end) in + lexer::make_tokenizer_located(content, Mode::Module, expr.location).flatten() + { if matches!(tok, Tok::String { .. }) { strings.push((start, end)); } else if matches!(tok, Tok::Percent) { diff --git a/crates/ruff/src/rules/pyupgrade/fixes.rs b/crates/ruff/src/rules/pyupgrade/fixes.rs index 00e471bc7741b..8f06ff35c2393 100644 --- a/crates/ruff/src/rules/pyupgrade/fixes.rs +++ b/crates/ruff/src/rules/pyupgrade/fixes.rs @@ -6,6 +6,7 @@ use libcst_native::{ use rustpython_parser::ast::{Expr, Keyword, Location}; use rustpython_parser::lexer; use rustpython_parser::lexer::Tok; +use rustpython_parser::mode::Mode; use crate::ast::types::Range; use crate::autofix::helpers::remove_argument; @@ -110,7 +111,7 @@ pub fn remove_import_members(contents: &str, members: &[&str]) -> String { // Find all Tok::Name tokens that are not preceded by Tok::As, and all // Tok::Comma tokens. let mut prev_tok = None; - for (start, tok, end) in lexer::make_tokenizer(contents) + for (start, tok, end) in lexer::make_tokenizer(contents, Mode::Module) .flatten() .skip_while(|(_, tok, _)| !matches!(tok, Tok::Import)) { diff --git a/crates/ruff/src/rules/pyupgrade/rules/f_strings.rs b/crates/ruff/src/rules/pyupgrade/rules/f_strings.rs index a5859ac87af7b..4486ebb0db524 100644 --- a/crates/ruff/src/rules/pyupgrade/rules/f_strings.rs +++ b/crates/ruff/src/rules/pyupgrade/rules/f_strings.rs @@ -1,4 +1,3 @@ -use ruff_macros::{define_violation, derive_message_formats}; use rustc_hash::FxHashMap; use rustpython_common::format::{ FieldName, FieldNamePart, FieldType, FormatPart, FormatString, FromTemplate, @@ -6,6 +5,9 @@ use rustpython_common::format::{ use rustpython_parser::ast::{Constant, Expr, ExprKind, KeywordData}; use rustpython_parser::lexer; use rustpython_parser::lexer::Tok; +use rustpython_parser::mode::Mode; + +use ruff_macros::{define_violation, derive_message_formats}; use crate::ast::types::Range; use crate::checkers::ast::Checker; @@ -129,7 +131,7 @@ fn try_convert_to_f_string(checker: &Checker, expr: &Expr) -> Option { let contents = checker.locator.slice(&Range::from_located(value)); // Tokenize: we need to avoid trying to fix implicit string concatenations. - if lexer::make_tokenizer(contents) + if lexer::make_tokenizer(contents, Mode::Module) .flatten() .filter(|(_, tok, _)| matches!(tok, Tok::String { .. })) .count() diff --git a/crates/ruff/src/rules/pyupgrade/rules/native_literals.rs b/crates/ruff/src/rules/pyupgrade/rules/native_literals.rs index 3e36de150bf54..54911b6ba21c1 100644 --- a/crates/ruff/src/rules/pyupgrade/rules/native_literals.rs +++ b/crates/ruff/src/rules/pyupgrade/rules/native_literals.rs @@ -4,6 +4,7 @@ use ruff_macros::{define_violation, derive_message_formats}; use rustpython_parser::ast::{Constant, Expr, ExprKind, Keyword}; use rustpython_parser::lexer; use rustpython_parser::lexer::Tok; +use rustpython_parser::mode::Mode; use serde::{Deserialize, Serialize}; use crate::ast::types::Range; @@ -118,7 +119,7 @@ pub fn native_literals( // safely remove the outer call in this situation. We're following pyupgrade // here and skip. let arg_code = checker.locator.slice(&Range::from_located(arg)); - if lexer::make_tokenizer(arg_code) + if lexer::make_tokenizer_located(arg_code, Mode::Module, arg.location) .flatten() .filter(|(_, tok, _)| matches!(tok, Tok::String { .. })) .count() diff --git a/crates/ruff/src/rules/pyupgrade/rules/outdated_version_block.rs b/crates/ruff/src/rules/pyupgrade/rules/outdated_version_block.rs index f532ab889da37..0f621274f9e3c 100644 --- a/crates/ruff/src/rules/pyupgrade/rules/outdated_version_block.rs +++ b/crates/ruff/src/rules/pyupgrade/rules/outdated_version_block.rs @@ -2,10 +2,12 @@ use std::cmp::Ordering; use log::error; use num_bigint::{BigInt, Sign}; -use ruff_macros::{define_violation, derive_message_formats}; use rustpython_parser::ast::{Cmpop, Constant, Expr, ExprKind, Located, Location, Stmt}; use rustpython_parser::lexer; use rustpython_parser::lexer::Tok; +use rustpython_parser::mode::Mode; + +use ruff_macros::{define_violation, derive_message_formats}; use crate::ast::types::{Range, RefEquality}; use crate::ast::whitespace::indentation; @@ -67,7 +69,7 @@ fn metadata(locator: &Locator, located: &Located) -> Option let mut else_ = None; for (start, tok, _) in - lexer::make_tokenizer_located(text, Location::new(located.location.row(), 0)) + lexer::make_tokenizer_located(text, Mode::Module, Location::new(located.location.row(), 0)) .flatten() .filter(|(_, tok, _)| { !matches!( diff --git a/crates/ruff/src/rules/pyupgrade/rules/printf_string_formatting.rs b/crates/ruff/src/rules/pyupgrade/rules/printf_string_formatting.rs index ae5cac3e36d0b..20f114b477e59 100644 --- a/crates/ruff/src/rules/pyupgrade/rules/printf_string_formatting.rs +++ b/crates/ruff/src/rules/pyupgrade/rules/printf_string_formatting.rs @@ -1,14 +1,16 @@ use std::str::FromStr; -use ruff_macros::{define_violation, derive_message_formats}; -use ruff_python::identifiers::is_identifier; -use ruff_python::keyword::KWLIST; use rustpython_common::cformat::{ CConversionFlags, CFormatPart, CFormatPrecision, CFormatQuantity, CFormatString, }; use rustpython_parser::ast::{Constant, Expr, ExprKind, Location}; use rustpython_parser::lexer; use rustpython_parser::lexer::Tok; +use rustpython_parser::mode::Mode; + +use ruff_macros::{define_violation, derive_message_formats}; +use ruff_python::identifiers::is_identifier; +use ruff_python::keyword::KWLIST; use crate::ast::types::Range; use crate::ast::whitespace::indentation; @@ -321,6 +323,7 @@ pub(crate) fn printf_string_formatting( let mut extension = None; for (start, tok, end) in lexer::make_tokenizer_located( checker.locator.slice(&Range::from_located(expr)), + Mode::Module, expr.location, ) .flatten() diff --git a/crates/ruff/src/rules/pyupgrade/rules/redundant_open_modes.rs b/crates/ruff/src/rules/pyupgrade/rules/redundant_open_modes.rs index 84c2eb3a580c5..2ea1dd7b5e3c8 100644 --- a/crates/ruff/src/rules/pyupgrade/rules/redundant_open_modes.rs +++ b/crates/ruff/src/rules/pyupgrade/rules/redundant_open_modes.rs @@ -5,6 +5,7 @@ use log::error; use ruff_macros::{define_violation, derive_message_formats}; use rustpython_parser::ast::{Constant, Expr, ExprKind, Keyword, Location}; use rustpython_parser::lexer; +use rustpython_parser::mode::Mode; use rustpython_parser::token::Tok; use crate::ast::helpers::find_keyword; @@ -142,7 +143,9 @@ fn create_remove_param_fix(locator: &Locator, expr: &Expr, mode_param: &Expr) -> let mut fix_end: Option = None; let mut is_first_arg: bool = false; let mut delete_first_arg: bool = false; - for (start, tok, end) in lexer::make_tokenizer_located(content, expr.location).flatten() { + for (start, tok, end) in + lexer::make_tokenizer_located(content, Mode::Module, expr.location).flatten() + { if start == mode_param.location { if is_first_arg { delete_first_arg = true; diff --git a/crates/ruff/src/rustpython_helpers.rs b/crates/ruff/src/rustpython_helpers.rs index eb498f7ebcb61..260fc51491401 100644 --- a/crates/ruff/src/rustpython_helpers.rs +++ b/crates/ruff/src/rustpython_helpers.rs @@ -7,7 +7,7 @@ use rustpython_parser::{lexer, parser}; /// Collect tokens up to and including the first error. pub fn tokenize(contents: &str) -> Vec { let mut tokens: Vec = vec![]; - for tok in lexer::make_tokenizer(contents) { + for tok in lexer::make_tokenizer(contents, Mode::Module) { let is_err = tok.is_err(); tokens.push(tok); if is_err { diff --git a/crates/ruff/src/source_code/indexer.rs b/crates/ruff/src/source_code/indexer.rs index 5a789f2d66df1..ef410eeffdb1a 100644 --- a/crates/ruff/src/source_code/indexer.rs +++ b/crates/ruff/src/source_code/indexer.rs @@ -51,13 +51,14 @@ impl From<&[LexResult]> for Indexer { mod tests { use rustpython_parser::lexer; use rustpython_parser::lexer::LexResult; + use rustpython_parser::mode::Mode; use crate::source_code::Indexer; #[test] fn continuation() { let contents = r#"x = 1"#; - let lxr: Vec = lexer::make_tokenizer(contents).collect(); + let lxr: Vec = lexer::make_tokenizer(contents, Mode::Module).collect(); let indexer: Indexer = lxr.as_slice().into(); assert_eq!(indexer.continuation_lines(), Vec::::new().as_slice()); @@ -69,7 +70,7 @@ x = 1 y = 2 "# .trim(); - let lxr: Vec = lexer::make_tokenizer(contents).collect(); + let lxr: Vec = lexer::make_tokenizer(contents, Mode::Module).collect(); let indexer: Indexer = lxr.as_slice().into(); assert_eq!(indexer.continuation_lines(), Vec::::new().as_slice()); @@ -89,7 +90,7 @@ if True: ) "# .trim(); - let lxr: Vec = lexer::make_tokenizer(contents).collect(); + let lxr: Vec = lexer::make_tokenizer(contents, Mode::Module).collect(); let indexer: Indexer = lxr.as_slice().into(); assert_eq!(indexer.continuation_lines(), [1, 5, 6, 11]); @@ -109,7 +110,7 @@ x = 1; \ import os "# .trim(); - let lxr: Vec = lexer::make_tokenizer(contents).collect(); + let lxr: Vec = lexer::make_tokenizer(contents, Mode::Module).collect(); let indexer: Indexer = lxr.as_slice().into(); assert_eq!(indexer.continuation_lines(), [9, 12]); } diff --git a/crates/ruff/src/source_code/stylist.rs b/crates/ruff/src/source_code/stylist.rs index ff021d3cb0e7d..34692a2695b53 100644 --- a/crates/ruff/src/source_code/stylist.rs +++ b/crates/ruff/src/source_code/stylist.rs @@ -7,6 +7,7 @@ use once_cell::unsync::OnceCell; use rustpython_parser::ast::Location; use rustpython_parser::lexer; use rustpython_parser::lexer::Tok; +use rustpython_parser::mode::Mode; use crate::ast::types::Range; use crate::rules::pydocstyle::helpers::leading_quote; @@ -165,7 +166,7 @@ impl Deref for LineEnding { /// Detect the indentation style of the given tokens. fn detect_indentation(contents: &str, locator: &Locator) -> Option { - for (_start, tok, end) in lexer::make_tokenizer(contents).flatten() { + for (_start, tok, end) in lexer::make_tokenizer(contents, Mode::Module).flatten() { if let Tok::Indent { .. } = tok { let start = Location::new(end.row(), 0); let whitespace = locator.slice(&Range::new(start, end)); @@ -177,7 +178,7 @@ fn detect_indentation(contents: &str, locator: &Locator) -> Option /// Detect the quotation style of the given tokens. fn detect_quote(contents: &str, locator: &Locator) -> Option { - for (start, tok, end) in lexer::make_tokenizer(contents).flatten() { + for (start, tok, end) in lexer::make_tokenizer(contents, Mode::Module).flatten() { if let Tok::String { .. } = tok { let content = locator.slice(&Range::new(start, end)); if let Some(pattern) = leading_quote(content) { diff --git a/crates/ruff_dev/src/print_tokens.rs b/crates/ruff_dev/src/print_tokens.rs index 3c730cedf812a..b10e2787ced89 100644 --- a/crates/ruff_dev/src/print_tokens.rs +++ b/crates/ruff_dev/src/print_tokens.rs @@ -6,6 +6,7 @@ use std::path::PathBuf; use anyhow::Result; use rustpython_parser::lexer; +use rustpython_parser::mode::Mode; #[derive(clap::Args)] pub struct Args { @@ -16,7 +17,7 @@ pub struct Args { pub fn main(args: &Args) -> Result<()> { let contents = fs::read_to_string(&args.file)?; - for (_, tok, _) in lexer::make_tokenizer(&contents).flatten() { + for (_, tok, _) in lexer::make_tokenizer(&contents, Mode::Module).flatten() { println!("{tok:#?}"); } Ok(()) diff --git a/crates/ruff_python_formatter/src/core/rustpython_helpers.rs b/crates/ruff_python_formatter/src/core/rustpython_helpers.rs index eb498f7ebcb61..260fc51491401 100644 --- a/crates/ruff_python_formatter/src/core/rustpython_helpers.rs +++ b/crates/ruff_python_formatter/src/core/rustpython_helpers.rs @@ -7,7 +7,7 @@ use rustpython_parser::{lexer, parser}; /// Collect tokens up to and including the first error. pub fn tokenize(contents: &str) -> Vec { let mut tokens: Vec = vec![]; - for tok in lexer::make_tokenizer(contents) { + for tok in lexer::make_tokenizer(contents, Mode::Module) { let is_err = tok.is_err(); tokens.push(tok); if is_err {