Skip to content

Commit

Permalink
Support concatenated literals in format-literals
Browse files Browse the repository at this point in the history
  • Loading branch information
charliermarsh committed Jun 9, 2023
1 parent aba073a commit 54510fb
Show file tree
Hide file tree
Showing 4 changed files with 100 additions and 60 deletions.
6 changes: 2 additions & 4 deletions crates/ruff/resources/test/fixtures/pyupgrade/UP030_0.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,6 @@

"\N{snowman} {0}".format(1)

'{' '0}'.format(1)

# These will not change because we are waiting for libcst to fix this issue:
# https://github.com/Instagram/LibCST/issues/846
print(
'foo{0}'
'bar{1}'.format(1, 2)
Expand All @@ -34,3 +30,5 @@
'foo{0}' # ohai\n"
'bar{1}'.format(1, 2)
)

'{' '0}'.format(1)
2 changes: 2 additions & 0 deletions crates/ruff/resources/test/fixtures/pyupgrade/UP030_1.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,5 @@
f"{0}".format(1)

print(f"{0}".format(1))

''.format(1)
115 changes: 67 additions & 48 deletions crates/ruff/src/rules/pyupgrade/rules/format_literals.rs
Original file line number Diff line number Diff line change
@@ -1,14 +1,14 @@
use anyhow::{anyhow, bail, Result};
use libcst_native::Arg;
use anyhow::{anyhow, Result};
use libcst_native::{Arg, Expression};
use once_cell::sync::Lazy;
use regex::Regex;
use rustpython_parser::ast::{Expr, Ranged};

use crate::autofix::codemods::CodegenStylist;
use ruff_diagnostics::{AutofixKind, Diagnostic, Edit, Fix, Violation};
use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast::source_code::{Locator, Stylist};

use crate::autofix::codemods::CodegenStylist;
use crate::checkers::ast::Checker;
use crate::cst::matchers::{match_attribute, match_call_mut, match_expression};
use crate::registry::AsRule;
Expand All @@ -35,29 +35,51 @@ impl Violation for FormatLiterals {
static FORMAT_SPECIFIER: Lazy<Regex> =
Lazy::new(|| Regex::new(r"\{(?P<int>\d+)(?P<fmt>.*?)}").unwrap());

/// Returns a string without the format specifiers.
/// Ex. "Hello {0} {1}" -> "Hello {} {}"
fn remove_specifiers(raw_specifiers: &str) -> String {
FORMAT_SPECIFIER
.replace_all(raw_specifiers, "{$fmt}")
.to_string()
/// Remove the explicit positional indices from a format string.
fn remove_specifiers<'a>(value: &mut Expression<'a>, arena: &'a mut typed_arena::Arena<String>) {
match value {
Expression::SimpleString(expr) => {
expr.value = arena.alloc(
FORMAT_SPECIFIER
.replace_all(expr.value, "{$fmt}")
.to_string(),
);
}
Expression::ConcatenatedString(expr) => {
let mut stack = vec![&mut expr.left, &mut expr.right];
while let Some(string) = stack.pop() {
match string.as_mut() {
libcst_native::String::Simple(string) => {
string.value = arena.alloc(
FORMAT_SPECIFIER
.replace_all(string.value, "{$fmt}")
.to_string(),
);
}
libcst_native::String::Concatenated(string) => {
stack.push(&mut string.left);
stack.push(&mut string.right);
}
libcst_native::String::Formatted(_) => {}
}
}
}
_ => {}
}
}

/// Return the corrected argument vector.
fn generate_arguments<'a>(
old_args: &[Arg<'a>],
correct_order: &'a [usize],
) -> Result<Vec<Arg<'a>>> {
let mut new_args: Vec<Arg> = Vec::with_capacity(old_args.len());
for (idx, given) in correct_order.iter().enumerate() {
fn generate_arguments<'a>(arguments: &[Arg<'a>], order: &'a [usize]) -> Result<Vec<Arg<'a>>> {
let mut new_arguments: Vec<Arg> = Vec::with_capacity(arguments.len());
for (idx, given) in order.iter().enumerate() {
// We need to keep the formatting in the same order but move the values.
let values = old_args
let values = arguments
.get(*given)
.ok_or_else(|| anyhow!("Failed to extract argument at: {given}"))?;
let formatting = old_args
let formatting = arguments
.get(idx)
.ok_or_else(|| anyhow!("Failed to extract argument at: {idx}"))?;
let new_arg = Arg {
let argument = Arg {
value: values.value.clone(),
comma: formatting.comma.clone(),
equal: None,
Expand All @@ -66,19 +88,14 @@ fn generate_arguments<'a>(
whitespace_after_star: formatting.whitespace_after_star.clone(),
whitespace_after_arg: formatting.whitespace_after_arg.clone(),
};
new_args.push(new_arg);
new_arguments.push(argument);
}
Ok(new_args)
Ok(new_arguments)
}

/// Returns true if the indices are sequential.
fn is_sequential(indices: &[usize]) -> bool {
for (expected, actual) in indices.iter().enumerate() {
if expected != *actual {
return false;
}
}
true
indices.iter().enumerate().all(|(idx, value)| idx == *value)
}

/// Returns the corrected function call.
Expand All @@ -88,28 +105,32 @@ fn generate_call(
locator: &Locator,
stylist: &Stylist,
) -> Result<String> {
let module_text = locator.slice(expr.range());
let mut expression = match_expression(module_text)?;
let call = match_call_mut(&mut expression)?;
let content = locator.slice(expr.range());
let parenthesized_content = format!("({content})");
let mut expression = match_expression(&parenthesized_content)?;

// Fix the call arguments.
let call = match_call_mut(&mut expression)?;
if !is_sequential(correct_order) {
call.args = generate_arguments(&call.args, correct_order)?;
}

// Fix the string itself.
let item = match_attribute(&mut call.func)?;
let mut arena = typed_arena::Arena::new();
remove_specifiers(&mut item.value, &mut arena);

let cleaned = remove_specifiers(&item.codegen_stylist(stylist));
// Remove the parentheses (first and last characters).
let mut output = expression.codegen_stylist(stylist);
output.remove(0);
output.pop();

call.func = Box::new(match_expression(&cleaned)?);

let state = expression.codegen_stylist(stylist);
if module_text == state {
// Ex) `'{' '0}'.format(1)`
bail!("Failed to generate call expression for: {module_text}")
// Ex) `'{' '0}'.format(1)`
if output == content {
return Err(anyhow!("Unable to identify format literals"));
}
Ok(state)

Ok(output)
}

/// UP030
Expand All @@ -124,23 +145,21 @@ pub(crate) fn format_literals(checker: &mut Checker, summary: &FormatSummary, ex
if !summary.autos.is_empty() {
return;
}
if !(0..summary.indices.len()).all(|index| summary.indices.contains(&index)) {
if summary.indices.is_empty() {
return;
}
if (0..summary.indices.len()).any(|index| !summary.indices.contains(&index)) {
return;
}

let mut diagnostic = Diagnostic::new(FormatLiterals, expr.range());
if checker.patch(diagnostic.kind.rule()) {
// Currently, the only issue we know of is in LibCST:
// https://github.com/Instagram/LibCST/issues/846
if let Ok(contents) =
generate_call(expr, &summary.indices, checker.locator, checker.stylist)
{
#[allow(deprecated)]
diagnostic.set_fix(Fix::unspecified(Edit::range_replacement(
contents,
diagnostic.try_set_fix(|| {
Ok(Fix::suggested(Edit::range_replacement(
generate_call(expr, &summary.indices, checker.locator, checker.stylist)?,
expr.range(),
)));
};
)))
});
}
checker.diagnostics.push(diagnostic);
}
Original file line number Diff line number Diff line change
Expand Up @@ -204,23 +204,34 @@ UP030_0.py:24:1: UP030 Use implicit references for positional format fields
26 | '{' '0}'.format(1)
| ^^^^^^^^^^^^^^^^^^ UP030
27 |
28 | # These will not change because we are waiting for libcst to fix this issue:
28 | ''.format(1)
|
= help: Remove explicit positional indices

UP030_0.py:29:5: UP030 Use implicit references for positional format fields
UP030_0.py:29:5: UP030 [*] Use implicit references for positional format fields
|
29 | # https://github.com/Instagram/LibCST/issues/846
30 | print(
31 | 'foo{0}'
29 | print(
30 | 'foo{0}'
| _____^
32 | | 'bar{1}'.format(1, 2)
31 | | 'bar{1}'.format(1, 2)
| |_________________________^ UP030
33 | )
32 | )
|
= help: Remove explicit positional indices

UP030_0.py:34:5: UP030 Use implicit references for positional format fields
Suggested fix
26 26 | ''.format(1)
27 27 |
28 28 | print(
29 |- 'foo{0}'
30 |- 'bar{1}'.format(1, 2)
29 |+ 'foo{}'
30 |+ 'bar{}'.format(1, 2)
31 31 | )
32 32 |
33 33 | print(

UP030_0.py:34:5: UP030 [*] Use implicit references for positional format fields
|
34 | print(
35 | 'foo{0}' # ohai\n"
Expand All @@ -231,4 +242,14 @@ UP030_0.py:34:5: UP030 Use implicit references for positional format fields
|
= help: Remove explicit positional indices

Suggested fix
31 31 | )
32 32 |
33 33 | print(
34 |- 'foo{0}' # ohai\n"
35 |- 'bar{1}'.format(1, 2)
34 |+ 'foo{}' # ohai\n"
35 |+ 'bar{}'.format(1, 2)
36 36 | )


0 comments on commit 54510fb

Please sign in to comment.