Skip to content

Commit

Permalink
Avoid repeating function calls in f-string conversions (#10265)
Browse files Browse the repository at this point in the history
## Summary

Given a format string like `"{x} {x}".format(x=foo())`, we should avoid
converting to an f-string, since doing so would require repeating the
function call (`f"{foo()} {foo()}"`), which could introduce side
effects.

Closes #10258.
  • Loading branch information
charliermarsh committed Mar 7, 2024
1 parent b9264a5 commit 461cdad
Show file tree
Hide file tree
Showing 3 changed files with 196 additions and 117 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -252,3 +252,10 @@ async def c():

# The dictionary should be parenthesized.
"{}".format({0: 1}())

# The string shouldn't be converted, since it would require repeating the function call.
"{x} {x}".format(x=foo())
"{0} {0}".format(foo())

# The string _should_ be converted, since the function call is repeated in the arguments.
"{0} {1}".format(foo(), foo())
286 changes: 169 additions & 117 deletions crates/ruff_linter/src/rules/pyupgrade/rules/f_strings.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
use std::borrow::Cow;

use anyhow::{Context, Result};
use rustc_hash::FxHashMap;
use rustc_hash::{FxHashMap, FxHashSet};

use ruff_diagnostics::{Diagnostic, Edit, Fix, FixAvailability, Violation};
use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast::helpers::any_over_expr;
use ruff_python_ast::str::{leading_quote, trailing_quote};
use ruff_python_ast::{self as ast, Expr, Keyword};
use ruff_python_literal::format::{
Expand Down Expand Up @@ -56,7 +57,7 @@ impl Violation for FString {
}

/// Like [`FormatSummary`], but maps positional and keyword arguments to their
/// values. For example, given `{a} {b}".format(a=1, b=2)`, `FormatFunction`
/// values. For example, given `{a} {b}".format(a=1, b=2)`, [`FormatSummary`]
/// would include `"a"` and `'b'` in `kwargs`, mapped to `1` and `2`
/// respectively.
#[derive(Debug)]
Expand Down Expand Up @@ -106,11 +107,11 @@ impl<'a> FormatSummaryValues<'a> {
})
}

/// Return the next positional argument.
fn arg_auto(&mut self) -> Option<&Expr> {
/// Return the next positional index.
fn arg_auto(&mut self) -> usize {
let idx = self.auto_index;
self.auto_index += 1;
self.arg_positional(idx)
idx
}

/// Return the positional argument at the given index.
Expand Down Expand Up @@ -216,130 +217,178 @@ fn formatted_expr<'a>(expr: &Expr, context: FormatContext, locator: &Locator<'a>
}
}

/// Convert a string `.format` call to an f-string.
///
/// Returns `None` if the string does not require conversion, and `Err` if the conversion
/// is not possible.
fn try_convert_to_f_string(
range: TextRange,
summary: &mut FormatSummaryValues,
locator: &Locator,
) -> Result<Option<String>> {
let contents = locator.slice(range);

// Strip the unicode prefix. It's redundant in Python 3, and invalid when used
// with f-strings.
let contents = if contents.starts_with('U') || contents.starts_with('u') {
&contents[1..]
} else {
contents
};
#[derive(Debug, Clone)]
enum FStringConversion {
/// The format string only contains literal parts.
Literal,
/// The format call uses arguments with side effects which are repeated within the
/// format string. For example: `"{x} {x}".format(x=foo())`.
SideEffects,
/// The format string should be converted to an f-string.
Convert(String),
}

// Temporarily strip the raw prefix, if present. It will be prepended to the result, before the
// 'f', to match the prefix order both the Ruff formatter (and Black) use when formatting code.
let raw = contents.starts_with('R') || contents.starts_with('r');
let contents = if raw { &contents[1..] } else { contents };

// Remove the leading and trailing quotes.
let leading_quote = leading_quote(contents).context("Unable to identify leading quote")?;
let trailing_quote = trailing_quote(contents).context("Unable to identify trailing quote")?;
let contents = &contents[leading_quote.len()..contents.len() - trailing_quote.len()];
if contents.is_empty() {
return Ok(None);
}
#[derive(Debug, Clone, Eq, PartialEq, Hash)]
enum IndexOrKeyword {
/// The field uses a positional index.
Index(usize),
/// The field uses a keyword name.
Keyword(String),
}

// Parse the format string.
let format_string = FormatString::from_str(contents)?;
impl FStringConversion {
/// Convert a string `.format` call to an f-string.
fn try_convert(
range: TextRange,
summary: &mut FormatSummaryValues,
locator: &Locator,
) -> Result<Self> {
let contents = locator.slice(range);

// Strip the unicode prefix. It's redundant in Python 3, and invalid when used
// with f-strings.
let contents = if contents.starts_with('U') || contents.starts_with('u') {
&contents[1..]
} else {
contents
};

// Temporarily strip the raw prefix, if present. It will be prepended to the result, before the
// 'f', to match the prefix order both the Ruff formatter (and Black) use when formatting code.
let raw = contents.starts_with('R') || contents.starts_with('r');
let contents = if raw { &contents[1..] } else { contents };

// Remove the leading and trailing quotes.
let leading_quote = leading_quote(contents).context("Unable to identify leading quote")?;
let trailing_quote =
trailing_quote(contents).context("Unable to identify trailing quote")?;
let contents = &contents[leading_quote.len()..contents.len() - trailing_quote.len()];

// If the format string is empty, it doesn't need to be converted.
if contents.is_empty() {
return Ok(Self::Literal);
}

if format_string
.format_parts
.iter()
.all(|part| matches!(part, FormatPart::Literal(..)))
{
return Ok(None);
}
// Parse the format string.
let format_string = FormatString::from_str(contents)?;

let mut converted = String::with_capacity(contents.len());
for part in format_string.format_parts {
match part {
FormatPart::Field {
field_name,
conversion_spec,
format_spec,
} => {
converted.push('{');

let field = FieldName::parse(&field_name)?;
let arg = match field.field_type {
FieldType::Auto => summary.arg_auto(),
FieldType::Index(index) => summary.arg_positional(index),
FieldType::Keyword(name) => summary.arg_keyword(&name),
}
.context("Unable to parse field")?;
converted.push_str(&formatted_expr(
arg,
if field.parts.is_empty() {
FormatContext::Bare
} else {
FormatContext::Accessed
},
locator,
));

for part in field.parts {
match part {
FieldNamePart::Attribute(name) => {
converted.push('.');
converted.push_str(&name);
// If the format string contains only literal parts, it doesn't need to be converted.
if format_string
.format_parts
.iter()
.all(|part| matches!(part, FormatPart::Literal(..)))
{
return Ok(Self::Literal);
}

let mut converted = String::with_capacity(contents.len());
let mut seen = FxHashSet::default();
for part in format_string.format_parts {
match part {
FormatPart::Field {
field_name,
conversion_spec,
format_spec,
} => {
converted.push('{');

let field = FieldName::parse(&field_name)?;

// Map from field type to specifier.
let specifier = match field.field_type {
FieldType::Auto => IndexOrKeyword::Index(summary.arg_auto()),
FieldType::Index(index) => IndexOrKeyword::Index(index),
FieldType::Keyword(name) => IndexOrKeyword::Keyword(name),
};

let arg = match &specifier {
IndexOrKeyword::Index(index) => {
summary.arg_positional(*index).ok_or_else(|| {
anyhow::anyhow!("Positional argument {index} is missing")
})?
}
FieldNamePart::Index(index) => {
converted.push('[');
converted.push_str(index.to_string().as_str());
converted.push(']');
IndexOrKeyword::Keyword(name) => {
summary.arg_keyword(name).ok_or_else(|| {
anyhow::anyhow!("Keyword argument '{name}' is missing")
})?
}
FieldNamePart::StringIndex(index) => {
let quote = match trailing_quote {
"'" | "'''" | "\"\"\"" => '"',
"\"" => '\'',
_ => unreachable!("invalid trailing quote"),
};
converted.push('[');
converted.push(quote);
converted.push_str(&index);
converted.push(quote);
converted.push(']');
};

// If the argument contains a side effect, and it's repeated in the format
// string, we can't convert the format string to an f-string. For example,
// converting `"{x} {x}".format(x=foo())` would result in `f"{foo()} {foo()}"`,
// which would call `foo()` twice.
if !seen.insert(specifier) {
if any_over_expr(arg, &Expr::is_call_expr) {
return Ok(Self::SideEffects);
}
}
}

if let Some(conversion_spec) = conversion_spec {
converted.push('!');
converted.push(conversion_spec);
}
converted.push_str(&formatted_expr(
arg,
if field.parts.is_empty() {
FormatContext::Bare
} else {
FormatContext::Accessed
},
locator,
));

for part in field.parts {
match part {
FieldNamePart::Attribute(name) => {
converted.push('.');
converted.push_str(&name);
}
FieldNamePart::Index(index) => {
converted.push('[');
converted.push_str(index.to_string().as_str());
converted.push(']');
}
FieldNamePart::StringIndex(index) => {
let quote = match trailing_quote {
"'" | "'''" | "\"\"\"" => '"',
"\"" => '\'',
_ => unreachable!("invalid trailing quote"),
};
converted.push('[');
converted.push(quote);
converted.push_str(&index);
converted.push(quote);
converted.push(']');
}
}
}

if !format_spec.is_empty() {
converted.push(':');
converted.push_str(&format_spec);
}
if let Some(conversion_spec) = conversion_spec {
converted.push('!');
converted.push(conversion_spec);
}

converted.push('}');
}
FormatPart::Literal(value) => {
converted.push_str(&curly_escape(&value));
if !format_spec.is_empty() {
converted.push(':');
converted.push_str(&format_spec);
}

converted.push('}');
}
FormatPart::Literal(value) => {
converted.push_str(&curly_escape(&value));
}
}
}
}

// Construct the format string.
let mut contents = String::with_capacity(usize::from(raw) + 1 + converted.len());
if raw {
contents.push('r');
// Construct the format string.
let mut contents = String::with_capacity(usize::from(raw) + 1 + converted.len());
if raw {
contents.push('r');
}
contents.push('f');
contents.push_str(leading_quote);
contents.push_str(&converted);
contents.push_str(trailing_quote);
Ok(Self::Convert(contents))
}
contents.push('f');
contents.push_str(leading_quote);
contents.push_str(&converted);
contents.push_str(trailing_quote);
Ok(Some(contents))
}

/// UP032
Expand Down Expand Up @@ -389,13 +438,16 @@ pub(crate) fn f_strings(
break range.start();
}
Some((Tok::String { .. }, range)) => {
match try_convert_to_f_string(range, &mut summary, checker.locator()) {
Ok(Some(fstring)) => patches.push((range, fstring)),
match FStringConversion::try_convert(range, &mut summary, checker.locator()) {
Ok(FStringConversion::Convert(fstring)) => patches.push((range, fstring)),
// Convert escaped curly brackets e.g. `{{` to `{` in literal string parts
Ok(None) => patches.push((
Ok(FStringConversion::Literal) => patches.push((
range,
curly_unescape(checker.locator().slice(range)).to_string(),
)),
// If the format string contains side effects that would need to be repeated,
// we can't convert it to an f-string.
Ok(FStringConversion::SideEffects) => return,
// If any of the segments fail to convert, then we can't convert the entire
// expression.
Err(_) => return,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1256,6 +1256,8 @@ UP032_0.py:254:1: UP032 [*] Use f-string instead of `format` call
253 | # The dictionary should be parenthesized.
254 | "{}".format({0: 1}())
| ^^^^^^^^^^^^^^^^^^^^^ UP032
255 |
256 | # The string shouldn't be converted, since it would require repeating the function call.
|
= help: Convert to f-string

Expand All @@ -1265,3 +1267,21 @@ UP032_0.py:254:1: UP032 [*] Use f-string instead of `format` call
253 253 | # The dictionary should be parenthesized.
254 |-"{}".format({0: 1}())
254 |+f"{({0: 1}())}"
255 255 |
256 256 | # The string shouldn't be converted, since it would require repeating the function call.
257 257 | "{x} {x}".format(x=foo())

UP032_0.py:261:1: UP032 [*] Use f-string instead of `format` call
|
260 | # The string _should_ be converted, since the function call is repeated in the arguments.
261 | "{0} {1}".format(foo(), foo())
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ UP032
|
= help: Convert to f-string

Safe fix
258 258 | "{0} {0}".format(foo())
259 259 |
260 260 | # The string _should_ be converted, since the function call is repeated in the arguments.
261 |-"{0} {1}".format(foo(), foo())
261 |+f"{foo()} {foo()}"

0 comments on commit 461cdad

Please sign in to comment.