From 461cdad53a1569d6b6dfa242734b140bcad1b7b7 Mon Sep 17 00:00:00 2001 From: Charlie Marsh Date: Wed, 6 Mar 2024 20:33:19 -0800 Subject: [PATCH] Avoid repeating function calls in f-string conversions (#10265) ## Summary Given a format string like `"{x} {x}".format(x=foo())`, we should avoid converting to an f-string, since doing so would require repeating the function call (`f"{foo()} {foo()}"`), which could introduce side effects. Closes https://github.com/astral-sh/ruff/issues/10258. --- .../test/fixtures/pyupgrade/UP032_0.py | 7 + .../src/rules/pyupgrade/rules/f_strings.rs | 286 +++++++++++------- ...__rules__pyupgrade__tests__UP032_0.py.snap | 20 ++ 3 files changed, 196 insertions(+), 117 deletions(-) diff --git a/crates/ruff_linter/resources/test/fixtures/pyupgrade/UP032_0.py b/crates/ruff_linter/resources/test/fixtures/pyupgrade/UP032_0.py index 86067d96f4e6d..09ea36db61943 100644 --- a/crates/ruff_linter/resources/test/fixtures/pyupgrade/UP032_0.py +++ b/crates/ruff_linter/resources/test/fixtures/pyupgrade/UP032_0.py @@ -252,3 +252,10 @@ async def c(): # The dictionary should be parenthesized. "{}".format({0: 1}()) + +# The string shouldn't be converted, since it would require repeating the function call. +"{x} {x}".format(x=foo()) +"{0} {0}".format(foo()) + +# The string _should_ be converted, since the function call is repeated in the arguments. +"{0} {1}".format(foo(), foo()) diff --git a/crates/ruff_linter/src/rules/pyupgrade/rules/f_strings.rs b/crates/ruff_linter/src/rules/pyupgrade/rules/f_strings.rs index 30c1fdb829843..05dbe6e26fc00 100644 --- a/crates/ruff_linter/src/rules/pyupgrade/rules/f_strings.rs +++ b/crates/ruff_linter/src/rules/pyupgrade/rules/f_strings.rs @@ -1,10 +1,11 @@ use std::borrow::Cow; use anyhow::{Context, Result}; -use rustc_hash::FxHashMap; +use rustc_hash::{FxHashMap, FxHashSet}; use ruff_diagnostics::{Diagnostic, Edit, Fix, FixAvailability, Violation}; use ruff_macros::{derive_message_formats, violation}; +use ruff_python_ast::helpers::any_over_expr; use ruff_python_ast::str::{leading_quote, trailing_quote}; use ruff_python_ast::{self as ast, Expr, Keyword}; use ruff_python_literal::format::{ @@ -56,7 +57,7 @@ impl Violation for FString { } /// Like [`FormatSummary`], but maps positional and keyword arguments to their -/// values. For example, given `{a} {b}".format(a=1, b=2)`, `FormatFunction` +/// values. For example, given `{a} {b}".format(a=1, b=2)`, [`FormatSummary`] /// would include `"a"` and `'b'` in `kwargs`, mapped to `1` and `2` /// respectively. #[derive(Debug)] @@ -106,11 +107,11 @@ impl<'a> FormatSummaryValues<'a> { }) } - /// Return the next positional argument. - fn arg_auto(&mut self) -> Option<&Expr> { + /// Return the next positional index. + fn arg_auto(&mut self) -> usize { let idx = self.auto_index; self.auto_index += 1; - self.arg_positional(idx) + idx } /// Return the positional argument at the given index. @@ -216,130 +217,178 @@ fn formatted_expr<'a>(expr: &Expr, context: FormatContext, locator: &Locator<'a> } } -/// Convert a string `.format` call to an f-string. -/// -/// Returns `None` if the string does not require conversion, and `Err` if the conversion -/// is not possible. -fn try_convert_to_f_string( - range: TextRange, - summary: &mut FormatSummaryValues, - locator: &Locator, -) -> Result> { - let contents = locator.slice(range); - - // Strip the unicode prefix. It's redundant in Python 3, and invalid when used - // with f-strings. - let contents = if contents.starts_with('U') || contents.starts_with('u') { - &contents[1..] - } else { - contents - }; +#[derive(Debug, Clone)] +enum FStringConversion { + /// The format string only contains literal parts. + Literal, + /// The format call uses arguments with side effects which are repeated within the + /// format string. For example: `"{x} {x}".format(x=foo())`. + SideEffects, + /// The format string should be converted to an f-string. + Convert(String), +} - // Temporarily strip the raw prefix, if present. It will be prepended to the result, before the - // 'f', to match the prefix order both the Ruff formatter (and Black) use when formatting code. - let raw = contents.starts_with('R') || contents.starts_with('r'); - let contents = if raw { &contents[1..] } else { contents }; - - // Remove the leading and trailing quotes. - let leading_quote = leading_quote(contents).context("Unable to identify leading quote")?; - let trailing_quote = trailing_quote(contents).context("Unable to identify trailing quote")?; - let contents = &contents[leading_quote.len()..contents.len() - trailing_quote.len()]; - if contents.is_empty() { - return Ok(None); - } +#[derive(Debug, Clone, Eq, PartialEq, Hash)] +enum IndexOrKeyword { + /// The field uses a positional index. + Index(usize), + /// The field uses a keyword name. + Keyword(String), +} - // Parse the format string. - let format_string = FormatString::from_str(contents)?; +impl FStringConversion { + /// Convert a string `.format` call to an f-string. + fn try_convert( + range: TextRange, + summary: &mut FormatSummaryValues, + locator: &Locator, + ) -> Result { + let contents = locator.slice(range); + + // Strip the unicode prefix. It's redundant in Python 3, and invalid when used + // with f-strings. + let contents = if contents.starts_with('U') || contents.starts_with('u') { + &contents[1..] + } else { + contents + }; + + // Temporarily strip the raw prefix, if present. It will be prepended to the result, before the + // 'f', to match the prefix order both the Ruff formatter (and Black) use when formatting code. + let raw = contents.starts_with('R') || contents.starts_with('r'); + let contents = if raw { &contents[1..] } else { contents }; + + // Remove the leading and trailing quotes. + let leading_quote = leading_quote(contents).context("Unable to identify leading quote")?; + let trailing_quote = + trailing_quote(contents).context("Unable to identify trailing quote")?; + let contents = &contents[leading_quote.len()..contents.len() - trailing_quote.len()]; + + // If the format string is empty, it doesn't need to be converted. + if contents.is_empty() { + return Ok(Self::Literal); + } - if format_string - .format_parts - .iter() - .all(|part| matches!(part, FormatPart::Literal(..))) - { - return Ok(None); - } + // Parse the format string. + let format_string = FormatString::from_str(contents)?; - let mut converted = String::with_capacity(contents.len()); - for part in format_string.format_parts { - match part { - FormatPart::Field { - field_name, - conversion_spec, - format_spec, - } => { - converted.push('{'); - - let field = FieldName::parse(&field_name)?; - let arg = match field.field_type { - FieldType::Auto => summary.arg_auto(), - FieldType::Index(index) => summary.arg_positional(index), - FieldType::Keyword(name) => summary.arg_keyword(&name), - } - .context("Unable to parse field")?; - converted.push_str(&formatted_expr( - arg, - if field.parts.is_empty() { - FormatContext::Bare - } else { - FormatContext::Accessed - }, - locator, - )); - - for part in field.parts { - match part { - FieldNamePart::Attribute(name) => { - converted.push('.'); - converted.push_str(&name); + // If the format string contains only literal parts, it doesn't need to be converted. + if format_string + .format_parts + .iter() + .all(|part| matches!(part, FormatPart::Literal(..))) + { + return Ok(Self::Literal); + } + + let mut converted = String::with_capacity(contents.len()); + let mut seen = FxHashSet::default(); + for part in format_string.format_parts { + match part { + FormatPart::Field { + field_name, + conversion_spec, + format_spec, + } => { + converted.push('{'); + + let field = FieldName::parse(&field_name)?; + + // Map from field type to specifier. + let specifier = match field.field_type { + FieldType::Auto => IndexOrKeyword::Index(summary.arg_auto()), + FieldType::Index(index) => IndexOrKeyword::Index(index), + FieldType::Keyword(name) => IndexOrKeyword::Keyword(name), + }; + + let arg = match &specifier { + IndexOrKeyword::Index(index) => { + summary.arg_positional(*index).ok_or_else(|| { + anyhow::anyhow!("Positional argument {index} is missing") + })? } - FieldNamePart::Index(index) => { - converted.push('['); - converted.push_str(index.to_string().as_str()); - converted.push(']'); + IndexOrKeyword::Keyword(name) => { + summary.arg_keyword(name).ok_or_else(|| { + anyhow::anyhow!("Keyword argument '{name}' is missing") + })? } - FieldNamePart::StringIndex(index) => { - let quote = match trailing_quote { - "'" | "'''" | "\"\"\"" => '"', - "\"" => '\'', - _ => unreachable!("invalid trailing quote"), - }; - converted.push('['); - converted.push(quote); - converted.push_str(&index); - converted.push(quote); - converted.push(']'); + }; + + // If the argument contains a side effect, and it's repeated in the format + // string, we can't convert the format string to an f-string. For example, + // converting `"{x} {x}".format(x=foo())` would result in `f"{foo()} {foo()}"`, + // which would call `foo()` twice. + if !seen.insert(specifier) { + if any_over_expr(arg, &Expr::is_call_expr) { + return Ok(Self::SideEffects); } } - } - if let Some(conversion_spec) = conversion_spec { - converted.push('!'); - converted.push(conversion_spec); - } + converted.push_str(&formatted_expr( + arg, + if field.parts.is_empty() { + FormatContext::Bare + } else { + FormatContext::Accessed + }, + locator, + )); + + for part in field.parts { + match part { + FieldNamePart::Attribute(name) => { + converted.push('.'); + converted.push_str(&name); + } + FieldNamePart::Index(index) => { + converted.push('['); + converted.push_str(index.to_string().as_str()); + converted.push(']'); + } + FieldNamePart::StringIndex(index) => { + let quote = match trailing_quote { + "'" | "'''" | "\"\"\"" => '"', + "\"" => '\'', + _ => unreachable!("invalid trailing quote"), + }; + converted.push('['); + converted.push(quote); + converted.push_str(&index); + converted.push(quote); + converted.push(']'); + } + } + } - if !format_spec.is_empty() { - converted.push(':'); - converted.push_str(&format_spec); - } + if let Some(conversion_spec) = conversion_spec { + converted.push('!'); + converted.push(conversion_spec); + } - converted.push('}'); - } - FormatPart::Literal(value) => { - converted.push_str(&curly_escape(&value)); + if !format_spec.is_empty() { + converted.push(':'); + converted.push_str(&format_spec); + } + + converted.push('}'); + } + FormatPart::Literal(value) => { + converted.push_str(&curly_escape(&value)); + } } } - } - // Construct the format string. - let mut contents = String::with_capacity(usize::from(raw) + 1 + converted.len()); - if raw { - contents.push('r'); + // Construct the format string. + let mut contents = String::with_capacity(usize::from(raw) + 1 + converted.len()); + if raw { + contents.push('r'); + } + contents.push('f'); + contents.push_str(leading_quote); + contents.push_str(&converted); + contents.push_str(trailing_quote); + Ok(Self::Convert(contents)) } - contents.push('f'); - contents.push_str(leading_quote); - contents.push_str(&converted); - contents.push_str(trailing_quote); - Ok(Some(contents)) } /// UP032 @@ -389,13 +438,16 @@ pub(crate) fn f_strings( break range.start(); } Some((Tok::String { .. }, range)) => { - match try_convert_to_f_string(range, &mut summary, checker.locator()) { - Ok(Some(fstring)) => patches.push((range, fstring)), + match FStringConversion::try_convert(range, &mut summary, checker.locator()) { + Ok(FStringConversion::Convert(fstring)) => patches.push((range, fstring)), // Convert escaped curly brackets e.g. `{{` to `{` in literal string parts - Ok(None) => patches.push(( + Ok(FStringConversion::Literal) => patches.push(( range, curly_unescape(checker.locator().slice(range)).to_string(), )), + // If the format string contains side effects that would need to be repeated, + // we can't convert it to an f-string. + Ok(FStringConversion::SideEffects) => return, // If any of the segments fail to convert, then we can't convert the entire // expression. Err(_) => return, diff --git a/crates/ruff_linter/src/rules/pyupgrade/snapshots/ruff_linter__rules__pyupgrade__tests__UP032_0.py.snap b/crates/ruff_linter/src/rules/pyupgrade/snapshots/ruff_linter__rules__pyupgrade__tests__UP032_0.py.snap index 305e2ee42f0dc..03d18e32eba16 100644 --- a/crates/ruff_linter/src/rules/pyupgrade/snapshots/ruff_linter__rules__pyupgrade__tests__UP032_0.py.snap +++ b/crates/ruff_linter/src/rules/pyupgrade/snapshots/ruff_linter__rules__pyupgrade__tests__UP032_0.py.snap @@ -1256,6 +1256,8 @@ UP032_0.py:254:1: UP032 [*] Use f-string instead of `format` call 253 | # The dictionary should be parenthesized. 254 | "{}".format({0: 1}()) | ^^^^^^^^^^^^^^^^^^^^^ UP032 +255 | +256 | # The string shouldn't be converted, since it would require repeating the function call. | = help: Convert to f-string @@ -1265,3 +1267,21 @@ UP032_0.py:254:1: UP032 [*] Use f-string instead of `format` call 253 253 | # The dictionary should be parenthesized. 254 |-"{}".format({0: 1}()) 254 |+f"{({0: 1}())}" +255 255 | +256 256 | # The string shouldn't be converted, since it would require repeating the function call. +257 257 | "{x} {x}".format(x=foo()) + +UP032_0.py:261:1: UP032 [*] Use f-string instead of `format` call + | +260 | # The string _should_ be converted, since the function call is repeated in the arguments. +261 | "{0} {1}".format(foo(), foo()) + | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ UP032 + | + = help: Convert to f-string + +ℹ Safe fix +258 258 | "{0} {0}".format(foo()) +259 259 | +260 260 | # The string _should_ be converted, since the function call is repeated in the arguments. +261 |-"{0} {1}".format(foo(), foo()) + 261 |+f"{foo()} {foo()}"