From 538789c9d4017d6c34dce2c0c0416c5f9e0d819d Mon Sep 17 00:00:00 2001 From: Micha Reiser Date: Tue, 6 Feb 2024 17:29:54 -0500 Subject: [PATCH] Try to improve trailing comma perf --- .../flake8_commas/rules/trailing_commas.rs | 141 ++++++++++-------- 1 file changed, 78 insertions(+), 63 deletions(-) diff --git a/crates/ruff_linter/src/rules/flake8_commas/rules/trailing_commas.rs b/crates/ruff_linter/src/rules/flake8_commas/rules/trailing_commas.rs index f0dc92b6001bb2..aa9dd703d4ea10 100644 --- a/crates/ruff_linter/src/rules/flake8_commas/rules/trailing_commas.rs +++ b/crates/ruff_linter/src/rules/flake8_commas/rules/trailing_commas.rs @@ -1,10 +1,8 @@ -use itertools::Itertools; - use ruff_diagnostics::{AlwaysFixableViolation, Violation}; use ruff_diagnostics::{Diagnostic, Edit, Fix}; use ruff_macros::{derive_message_formats, violation}; use ruff_python_index::Indexer; -use ruff_python_parser::lexer::{LexResult, Spanned}; +use ruff_python_parser::lexer::LexResult; use ruff_python_parser::Tok; use ruff_source_file::Locator; use ruff_text_size::{Ranged, TextRange}; @@ -12,20 +10,20 @@ use ruff_text_size::{Ranged, TextRange}; /// Simplified token type. #[derive(Copy, Clone, PartialEq, Eq)] enum TokenType { - Irrelevant, - NonLogicalNewline, + Named, + String, Newline, - Comma, + NonLogicalNewline, OpeningBracket, + ClosingBracket, OpeningSquareBracket, + Colon, + Comma, OpeningCurlyBracket, - ClosingBracket, - For, - Named, Def, + For, Lambda, - Colon, - String, + Irrelevant, } /// Simplified token specialized for the task. @@ -54,30 +52,30 @@ impl Token { } } -impl From<&Spanned> for Token { - fn from(spanned: &Spanned) -> Self { - let r#type = match &spanned.0 { - Tok::NonLogicalNewline => TokenType::NonLogicalNewline, - Tok::Newline => TokenType::Newline, - Tok::For => TokenType::For, - Tok::Def => TokenType::Def, - Tok::Lambda => TokenType::Lambda, - // Import treated like a function. - Tok::Import => TokenType::Named, +impl From<(&Tok, TextRange)> for Token { + fn from((tok, range): (&Tok, TextRange)) -> Self { + let r#type = match tok { Tok::Name { .. } => TokenType::Named, Tok::String { .. } => TokenType::String, - Tok::Comma => TokenType::Comma, + Tok::Newline => TokenType::Newline, + Tok::NonLogicalNewline => TokenType::NonLogicalNewline, Tok::Lpar => TokenType::OpeningBracket, + Tok::Rpar => TokenType::ClosingBracket, Tok::Lsqb => TokenType::OpeningSquareBracket, - Tok::Lbrace => TokenType::OpeningCurlyBracket, - Tok::Rpar | Tok::Rsqb | Tok::Rbrace => TokenType::ClosingBracket, + Tok::Rsqb => TokenType::ClosingBracket, Tok::Colon => TokenType::Colon, + Tok::Comma => TokenType::Comma, + Tok::Lbrace => TokenType::OpeningCurlyBracket, + Tok::Rbrace => TokenType::ClosingBracket, + Tok::Def => TokenType::Def, + Tok::For => TokenType::For, + Tok::Lambda => TokenType::Lambda, + // Import treated like a function. + Tok::Import => TokenType::Named, _ => TokenType::Irrelevant, }; - Self { - range: spanned.1, - r#type, - } + #[allow(clippy::inconsistent_struct_constructor)] + Self { range, r#type } } } @@ -237,10 +235,12 @@ pub(crate) fn trailing_commas( indexer: &Indexer, ) { let mut fstrings = 0u32; - let tokens = tokens - .iter() - .flatten() - .filter_map(|spanned @ (tok, tok_range)| match tok { + let tokens = tokens.iter().filter_map(|result| { + let Ok((tok, tok_range)) = result else { + return None; + }; + + match tok { // Completely ignore comments -- they just interfere with the logic. Tok::Comment(_) => None, // F-strings are handled as `String` token type with the complete range @@ -263,69 +263,81 @@ pub(crate) fn trailing_commas( } _ => { if fstrings == 0 { - Some(Token::from(spanned)) + Some(Token::from((tok, *tok_range))) } else { None } } - }); - let tokens = [Token::irrelevant(), Token::irrelevant()] - .into_iter() - .chain(tokens); - // Collapse consecutive newlines to the first one -- trailing commas are - // added before the first newline. - let tokens = tokens.coalesce(|previous, current| { - if previous.r#type == TokenType::NonLogicalNewline - && current.r#type == TokenType::NonLogicalNewline - { - Ok(previous) - } else { - Err((previous, current)) } }); - // The current nesting of the comma contexts. + let mut prev = Token::irrelevant(); + let mut prev_prev = Token::irrelevant(); + let mut stack = vec![Context::new(ContextType::No)]; - for (prev_prev, prev, token) in tokens.tuple_windows() { + for token in tokens { + if prev.r#type == TokenType::NonLogicalNewline + && token.r#type == TokenType::NonLogicalNewline + { + // Collapse consecutive newlines to the first one -- trailing commas are + // added before the first newline. + continue; + } + // Update the comma context stack. - match token.r#type { + let context = match token.r#type { TokenType::OpeningBracket => match (prev.r#type, prev_prev.r#type) { (TokenType::Named, TokenType::Def) => { - stack.push(Context::new(ContextType::FunctionParameters)); + let context = Context::new(ContextType::FunctionParameters); + stack.push(context); + context } (TokenType::Named | TokenType::ClosingBracket, _) => { - stack.push(Context::new(ContextType::CallArguments)); + let context = Context::new(ContextType::CallArguments); + stack.push(context); + context } _ => { - stack.push(Context::new(ContextType::Tuple)); + let context = Context::new(ContextType::Tuple); + stack.push(context); + context } }, TokenType::OpeningSquareBracket => match prev.r#type { TokenType::ClosingBracket | TokenType::Named | TokenType::String => { - stack.push(Context::new(ContextType::Subscript)); + let context = Context::new(ContextType::Subscript); + stack.push(context); + context } _ => { - stack.push(Context::new(ContextType::List)); + let context = Context::new(ContextType::List); + stack.push(context); + context } }, TokenType::OpeningCurlyBracket => { - stack.push(Context::new(ContextType::Dict)); + let context = Context::new(ContextType::Dict); + stack.push(context); + context } TokenType::Lambda => { - stack.push(Context::new(ContextType::LambdaParameters)); + let context = Context::new(ContextType::LambdaParameters); + stack.push(context); + context } TokenType::For => { - let len = stack.len(); - stack[len - 1] = Context::new(ContextType::No); + let last = stack.last_mut().expect("Stack to never be empty"); + *last = Context::new(ContextType::No); + *last } TokenType::Comma => { - let len = stack.len(); - stack[len - 1].inc(); + let last = stack.last_mut().expect("Stack to never be empty"); + last.inc(); + *last } - _ => {} - } - let context = &stack[stack.len() - 1]; + _ => stack.last().copied().expect("Stack to never be empty"), + }; // Is it allowed to have a trailing comma before this token? let comma_allowed = token.r#type == TokenType::ClosingBracket @@ -412,5 +424,8 @@ pub(crate) fn trailing_commas( if pop_context && stack.len() > 1 { stack.pop(); } + + prev_prev = prev; + prev = token; } }