diff --git a/Cargo.lock b/Cargo.lock index f7333fa504d9a7..db8cdef9665e9c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1818,7 +1818,6 @@ dependencies = [ "chrono", "clap", "colored", - "criterion", "dirs 5.0.1", "fern", "glob", @@ -1873,7 +1872,6 @@ dependencies = [ "thiserror", "toml", "typed-arena", - "unicase", "unicode-width", "unicode_names2", ] @@ -2888,15 +2886,6 @@ dependencies = [ "unic-common", ] -[[package]] -name = "unicase" -version = "2.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "50f37be617794602aabbeee0be4f259dc1778fabe05e2d67ee8f79326d5cb4f6" -dependencies = [ - "version_check", -] - [[package]] name = "unicode-bidi" version = "0.3.13" diff --git a/crates/ruff/Cargo.toml b/crates/ruff/Cargo.toml index 205ab65cd0106c..7503cdddd55acb 100644 --- a/crates/ruff/Cargo.toml +++ b/crates/ruff/Cargo.toml @@ -27,7 +27,7 @@ ruff_rustpython = { path = "../ruff_rustpython" } ruff_text_size = { workspace = true } ruff_textwrap = { path = "../ruff_textwrap" } -aho-corasick = {version = "1.0.2"} +aho-corasick = { version = "1.0.2" } annotate-snippets = { version = "0.9.1", features = ["color"] } anyhow = { workspace = true } bitflags = { workspace = true } @@ -79,8 +79,6 @@ toml = { workspace = true } typed-arena = { version = "2.0.2" } unicode-width = { version = "0.1.10" } unicode_names2 = { version = "0.6.0", git = "https://github.com/youknowone/unicode_names2.git", rev = "4ce16aa85cbcdd9cc830410f1a72ef9a235f2fde" } -unicase = "2.6.0" -criterion = "0.5.1" [dev-dependencies] insta = { workspace = true } @@ -94,7 +92,3 @@ default = [] schemars = ["dep:schemars"] # Enables the UnreachableCode rule unreachable-code = [] - -[[bench]] -name = "benchmark" -harness = false diff --git a/crates/ruff/benches/benchmark.rs b/crates/ruff/benches/benchmark.rs deleted file mode 100644 index 0f26b2ffa84374..00000000000000 --- a/crates/ruff/benches/benchmark.rs +++ /dev/null @@ -1,58 +0,0 @@ -use criterion::{black_box, criterion_group, criterion_main, BenchmarkId, Criterion}; -use ruff_text_size::TextSize; - -use ruff::noqa::{Directive, ParsedFileExemption}; - -pub fn directive_benchmark(c: &mut Criterion) { - let mut group = c.benchmark_group("Directive"); - // for i in [ - // "# noqa: F401", - // "# noqa: F401, F841", - // "# noqa", - // "# type: ignore # noqa: E501", - // "# type: ignore # nosec", - // "# some very long comment that # is interspersed with characters but # no directive", - // ] - // .iter() - // { - // group.bench_with_input(BenchmarkId::new("Regex", i), i, |b, _i| { - // b.iter(|| Directive::try_extract(black_box(i), TextSize::default())) - // }); - // group.bench_with_input(BenchmarkId::new("Find", i), i, |b, _i| { - // b.iter(|| Directive::try_parse(black_box(i), TextSize::default())) - // }); - // group.bench_with_input(BenchmarkId::new("AhoCorasick", i), i, |b, _i| { - // b.iter(|| Directive::try_parse_aho_corasick(black_box(i), TextSize::default())) - // }); - // group.bench_with_input(BenchmarkId::new("Memchr", i), i, |b, _i| { - // b.iter(|| Directive::try_parse_memchr(black_box(i), TextSize::default())) - // }); - // } - - for i in [ - "# ruff: noqa", - "# flake8: NOQA", - "# noqa: F401, F841", - "# noqa", - "# type: ignore # noqa: E501", - "# type: ignore # nosec", - "# some very long comment that # is interspersed with characters but # no directive", - ] - .iter() - { - group.bench_with_input(BenchmarkId::new("Regex", i), i, |b, _i| { - b.iter(|| ParsedFileExemption::extract(black_box(i))) - }); - group.bench_with_input(BenchmarkId::new("Parser", i), i, |b, _i| { - b.iter(|| ParsedFileExemption::parse(black_box(i))) - }); - group.bench_with_input(BenchmarkId::new("Matches", i), i, |b, _i| { - b.iter(|| ParsedFileExemption::matches(black_box(i))) - }); - } - - group.finish(); -} - -criterion_group!(benches, directive_benchmark); -criterion_main!(benches); diff --git a/crates/ruff/src/lib.rs b/crates/ruff/src/lib.rs index 155414c1bcd918..2a69194fad4e98 100644 --- a/crates/ruff/src/lib.rs +++ b/crates/ruff/src/lib.rs @@ -27,7 +27,7 @@ pub mod line_width; pub mod linter; pub mod logging; pub mod message; -pub mod noqa; +mod noqa; pub mod packaging; pub mod pyproject_toml; pub mod registry; diff --git a/crates/ruff/src/noqa.rs b/crates/ruff/src/noqa.rs index abd69f1dfc3f90..b0ab2064c6ec56 100644 --- a/crates/ruff/src/noqa.rs +++ b/crates/ruff/src/noqa.rs @@ -11,7 +11,6 @@ use log::warn; use once_cell::sync::Lazy; use ruff_text_size::{TextLen, TextRange, TextSize}; use rustpython_parser::ast::Ranged; -use unicase::UniCase; use ruff_diagnostics::Diagnostic; use ruff_python_ast::source_code::Locator; @@ -31,7 +30,7 @@ static NOQA_MATCHER: Lazy = Lazy::new(|| { /// A directive to ignore a set of rules for a given line of Python source code (e.g., /// `# noqa: F401, F841`). #[derive(Debug)] -pub enum Directive<'a> { +pub(crate) enum Directive<'a> { /// The `noqa` directive ignores all rules (e.g., `# noqa`). All(All), /// The `noqa` directive ignores specific rules (e.g., `# noqa: F401, F841`). @@ -40,23 +39,25 @@ pub enum Directive<'a> { impl<'a> Directive<'a> { /// Extract the noqa `Directive` from a line of Python source code. - pub fn try_extract(text: &'a str, offset: TextSize) -> Option { + pub(crate) fn try_extract(text: &'a str, offset: TextSize) -> Option { for mat in NOQA_MATCHER.find_iter(text) { - let mut comment_start = mat.start(); + let noqa_literal_start = mat.start(); + + // Determine the start of the comment. + let mut comment_start = noqa_literal_start; // Trim any whitespace between the `#` character and the `noqa` literal. comment_start -= text[..comment_start].len() - text[..comment_start].trim_end().len(); // The next character has to be the `#` character. - if !text[..comment_start].ends_with('#') { + if text[..comment_start].ends_with('#') { + comment_start -= 1; + } else { continue; } - // The start of the `#` character. - comment_start -= 1; - - // If the next character is `:`, then it's a list of codes. Otherwise, it's an `all` - // directive. + // If the next character is `:`, then it's a list of codes. Otherwise, it's a directive + // to ignore all rules. let noqa_literal_end = mat.end(); return Some(if text[noqa_literal_end..].starts_with(':') { // E.g., `# noqa: F401, F841`. @@ -116,7 +117,9 @@ impl<'a> Directive<'a> { /// Lex an individual rule code (e.g., `F401`). fn lex_code(text: &str) -> Option<&str> { + // Extract, e.g., the `F` in `F401`. let prefix = text.chars().take_while(char::is_ascii_uppercase).count(); + // Extract, e.g., the `401` in `F401`. let suffix = text[prefix..] .chars() .take_while(char::is_ascii_digit) @@ -130,7 +133,7 @@ impl<'a> Directive<'a> { } #[derive(Debug)] -pub struct All { +pub(crate) struct All { range: TextRange, } @@ -142,7 +145,7 @@ impl Ranged for All { } #[derive(Debug)] -pub struct Codes<'a> { +pub(crate) struct Codes<'a> { range: TextRange, codes: Vec<&'a str>, } @@ -233,7 +236,7 @@ impl FileExemption { /// [`FileExemption`], but only for a single line, as opposed to an aggregated set of exemptions /// across a source file. #[derive(Debug)] -pub enum ParsedFileExemption<'a> { +pub(crate) enum ParsedFileExemption<'a> { /// No file-level exemption was found. None, /// The file-level exemption ignores all rules (e.g., `# ruff: noqa`). @@ -244,77 +247,7 @@ pub enum ParsedFileExemption<'a> { impl<'a> ParsedFileExemption<'a> { /// Return a [`ParsedFileExemption`] for a given comment line. - pub fn parse(line: &'a str) -> Self { - let line = line.trim_whitespace_start(); - - if line.len() >= "# flake8: noqa".len() { - if UniCase::new(&line[.."# flake8: noqa".len()]) == UniCase::new("# flake8: noqa") { - return Self::All; - } - } - - if line.len() >= "# ruff: noqa".len() { - if UniCase::new(&line[.."# ruff: noqa".len()]) == UniCase::new("# ruff: noqa") { - let remainder = &line["# ruff: noqa".len()..]; - if remainder.is_empty() { - return Self::All; - } else if let Some(codes) = remainder.strip_prefix(':') { - let codes = codes - .split(|c: char| c.is_whitespace() || c == ',') - .map(str::trim) - .filter(|code| !code.is_empty()) - .collect_vec(); - if codes.is_empty() { - warn!("Expected rule codes on `noqa` directive: \"{line}\""); - } - return Self::Codes(codes); - } - } - } - - Self::None - } - - /// Return a [`ParsedFileExemption`] for a given comment line. - pub fn matches(line: &'a str) -> Self { - let line = line.trim_whitespace_start(); - - if line.len() >= "# flake8: noqa".len() { - if matches!( - &line[.."# flake8: noqa".len()], - "# flake8: noqa" | "# flake8: NOQA" | "# flake8: NoQA" - ) { - return Self::All; - } - } - - if line.len() >= "# ruff: noqa".len() { - if matches!( - &line[.."# ruff: noqa".len()], - "# ruff: noqa" | "# ruff: NOQA" | "# ruff: NoQA" - ) { - let remainder = &line["# ruff: noqa".len()..]; - if remainder.is_empty() { - return Self::All; - } else if let Some(codes) = remainder.strip_prefix(':') { - let codes = codes - .split(|c: char| c.is_whitespace() || c == ',') - .map(str::trim) - .filter(|code| !code.is_empty()) - .collect_vec(); - if codes.is_empty() { - warn!("Expected rule codes on `noqa` directive: \"{line}\""); - } - return Self::Codes(codes); - } - } - } - - Self::None - } - - /// Return a [`ParsedFileExemption`] for a given comment line. - pub fn extract(line: &'a str) -> Self { + pub(crate) fn extract(line: &'a str) -> Self { let line = line.trim_whitespace_start(); if line.starts_with("# flake8: noqa") @@ -621,7 +554,7 @@ impl NoqaMapping { } /// Returns the re-mapped position or `position` if no mapping exists. - pub fn resolve(&self, offset: TextSize) -> TextSize { + pub(crate) fn resolve(&self, offset: TextSize) -> TextSize { let index = self.ranges.binary_search_by(|range| { if range.end() < offset { std::cmp::Ordering::Less @@ -639,7 +572,7 @@ impl NoqaMapping { } } - pub fn push_mapping(&mut self, range: TextRange) { + pub(crate) fn push_mapping(&mut self, range: TextRange) { if let Some(last_range) = self.ranges.last_mut() { // Strictly sorted insertion if last_range.end() <= range.start() {