Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Pyupgrade: Format specifiers #1594

Merged
merged 31 commits into from
Jan 11, 2023
Merged
Show file tree
Hide file tree
Changes from 28 commits
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
672c984
Began work on the parser
colin99d Jan 2, 2023
c127480
Continued progress
colin99d Jan 2, 2023
5e686c4
Further along but broken
colin99d Jan 3, 2023
e599734
Fixed tests
colin99d Jan 5, 2023
c68a9da
Added more checks
colin99d Jan 5, 2023
b8c06e7
Added all should pass edge cases
colin99d Jan 5, 2023
6f3c4e5
Fixing small mistakes
colin99d Jan 5, 2023
37d25dd
Added fixes
colin99d Jan 5, 2023
287f90f
Replaced lazy_static with lazy
colin99d Jan 6, 2023
cb836f9
Fixed merge conflicts
colin99d Jan 6, 2023
7645bc5
Fixed typos
colin99d Jan 6, 2023
dfee10f
Fixed incorrect import
colin99d Jan 6, 2023
aa714a8
Hunting down error with: cargo run resources/test/fixtures/pyupgrade/…
colin99d Jan 7, 2023
ec15215
Merged
colin99d Jan 9, 2023
257c828
For a multiline print statement just add a check and not a fix
colin99d Jan 9, 2023
782fce3
Added fix for helper functions, and column for SDK type
colin99d Jan 9, 2023
db92e9b
Updated mod
colin99d Jan 9, 2023
5adbe05
Aded fixes
colin99d Jan 9, 2023
1fd88cd
Added negative cases, fixed one negative edge case
colin99d Jan 9, 2023
fc6af20
Handled one more negative edge case
colin99d Jan 9, 2023
7920747
Made progress in testing
colin99d Jan 9, 2023
aa0c2cb
Clippy and fmt
colin99d Jan 9, 2023
29241ef
Fixed merge conflicts
colin99d Jan 9, 2023
ef493b2
Added new tests
colin99d Jan 9, 2023
0d395a3
Fixed linters
colin99d Jan 9, 2023
76fbc56
Fixed last bug
colin99d Jan 9, 2023
5121008
Fixed clippy and docs
colin99d Jan 9, 2023
c237e4e
Merge branch 'main' into formatspecifiers
colin99d Jan 10, 2023
75ca3ba
Merge branch 'main' into formatspecifiers
charliermarsh Jan 10, 2023
4e84dd6
Use ? in lieu of match, rename to format_literals
charliermarsh Jan 11, 2023
14a01ec
Use format.rs
charliermarsh Jan 11, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -701,6 +701,7 @@ For more, see [pyupgrade](https://pypi.org/project/pyupgrade/3.2.0/) on PyPI.
| UP027 | RewriteListComprehension | Replace unpacked list comprehension with a generator expression | 🛠 |
| UP028 | RewriteYieldFrom | Replace `yield` over `for` loop with `yield from` | 🛠 |
| UP029 | UnnecessaryBuiltinImport | Unnecessary builtin import: `...` | 🛠 |
| UP030 | FormatSpecifiers | Remove specifiers from inside the string's brackets | 🛠 |

### pep8-naming (N)

Expand Down
33 changes: 33 additions & 0 deletions resources/test/fixtures/pyupgrade/UP030_0.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# These SHOULD change
"{0}" "{1}" "{2}".format(1, 2, 3)

"a {3} complicated {1} string with {0} {2}".format(
"first", "second", "third", "fourth"
)

'{0}'.format(1)

'{0:x}'.format(30)

x = '{0}'.format(1)

'''{0}\n{1}\n'''.format(1, 2)

x = "foo {0}" \
"bar {1}".format(1, 2)

("{0}").format(1)

"\N{snowman} {0}".format(1)

# These will not change because we are waiting for libcst to fix this issue:
# https://github.com/Instagram/LibCST/issues/846
print(
'foo{0}'
'bar{1}'.format(1, 2)
)

print(
'foo{0}' # ohai\n"
'bar{1}'.format(1, 2)
)
25 changes: 25 additions & 0 deletions resources/test/fixtures/pyupgrade/UP030_1.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
# These should NOT change

'{}'.format(1)


x = ('{0} {1}',)

'{0} {0}'.format(1)

'{0:<{1}}'.format(1, 4)

'{' '0}'.format(1)

f"{0}".format(a)

f"{0}".format(1)

print(f"{0}".format(1))

# I did not include the following tests because ruff does not seem to work with
# invalid python syntax (which is a good thing)

# "{0}"format(1)
# '{'.format(1)", "'}'.format(1)
# ("{0}" # {1}\n"{2}").format(1, 2, 3)
2 changes: 2 additions & 0 deletions ruff.schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -1595,6 +1595,8 @@
"UP027",
"UP028",
"UP029",
"UP03",
"UP030",
"W",
"W2",
"W29",
Expand Down
3 changes: 3 additions & 0 deletions src/checkers/ast.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1917,6 +1917,9 @@ where
if self.settings.enabled.contains(&RuleCode::UP024) {
pyupgrade::rules::os_error_alias(self, expr);
}
if self.settings.enabled.contains(&RuleCode::UP030) {
pyupgrade::rules::format_specifiers(self, expr, func);
}

// flake8-print
if self.settings.enabled.contains(&RuleCode::T201)
Expand Down
19 changes: 18 additions & 1 deletion src/cst/matchers.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
use anyhow::{bail, Result};
use libcst_native::{Expr, Import, ImportFrom, Module, SmallStatement, Statement};
use libcst_native::{
Call, Expr, Expression, Import, ImportFrom, Module, SmallStatement, Statement,
};

pub fn match_module(module_text: &str) -> Result<Module> {
match libcst_native::parse_module(module_text, None) {
Expand All @@ -8,6 +10,13 @@ pub fn match_module(module_text: &str) -> Result<Module> {
}
}

pub fn match_expression(expression_text: &str) -> Result<Expression> {
match libcst_native::parse_expression(expression_text) {
Ok(expression) => Ok(expression),
Err(_) => bail!("Failed to extract CST from source"),
}
}

pub fn match_expr<'a, 'b>(module: &'a mut Module<'b>) -> Result<&'a mut Expr<'b>> {
if let Some(Statement::Simple(expr)) = module.body.first_mut() {
if let Some(SmallStatement::Expr(expr)) = expr.body.first_mut() {
Expand Down Expand Up @@ -43,3 +52,11 @@ pub fn match_import_from<'a, 'b>(module: &'a mut Module<'b>) -> Result<&'a mut I
bail!("Expected Statement::Simple")
}
}

pub fn match_call<'a, 'b>(expression: &'a mut Expression<'b>) -> Result<&'a mut Call<'b>> {
if let Expression::Call(call) = expression {
Ok(call)
} else {
bail!("Expected SmallStatement::Expr")
}
}
2 changes: 2 additions & 0 deletions src/pyupgrade/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ mod tests {
#[test_case(RuleCode::UP028, Path::new("UP028_0.py"); "UP028_0")]
#[test_case(RuleCode::UP028, Path::new("UP028_1.py"); "UP028_1")]
#[test_case(RuleCode::UP029, Path::new("UP029.py"); "UP029")]
#[test_case(RuleCode::UP030, Path::new("UP030_0.py"); "UP030_0")]
#[test_case(RuleCode::UP030, Path::new("UP030_1.py"); "UP030_1")]
fn rules(rule_code: RuleCode, path: &Path) -> Result<()> {
let snapshot = format!("{}_{}", rule_code.as_ref(), path.to_string_lossy());
let diagnostics = test_path(
Expand Down
215 changes: 215 additions & 0 deletions src/pyupgrade/rules/format_specifiers.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,215 @@
use libcst_native::{parse_expression, Arg, Codegen, CodegenState, Expression};
use num_bigint::{BigInt, Sign};
use once_cell::sync::Lazy;
use regex::Regex;
use rustpython_ast::{Constant, Expr, ExprKind};
use rustpython_parser::lexer;
use rustpython_parser::lexer::Tok;

use crate::ast::types::Range;
use crate::autofix::Fix;
use crate::checkers::ast::Checker;
use crate::cst::matchers::{match_call, match_expression};
use crate::registry::Diagnostic;
use crate::violations;

// This checks for a an opening squiggly bracket, followed by any integer,
// followed by any text, follow be a squiggly closing bracket
static FORMAT_SPECIFIER: Lazy<Regex> =
Lazy::new(|| Regex::new(r"\{(?P<int>\d+)(?P<fmt>.*?)\}").unwrap());

// When we check for a nested format specifier, the closing bracket will be
// removed, so we just need to check for the opening bracket and an integer
static FIRST_HALF: Lazy<Regex> = Lazy::new(|| Regex::new(r"\{(\d+)").unwrap());

/// Convert a python integer to a unsigned 32 but integer. We are assuming this
/// will never overflow because people will probably never have more than 2^32
/// arguments to a format string. I am also ignoring the signed, I personally
/// checked and negative numbers are not allowed in format strings
fn convert_big_int(bigint: &BigInt) -> Option<u32> {
let (sign, digits) = bigint.to_u32_digits();
match sign {
Sign::Plus => digits.first().copied(),
Sign::Minus => None,
Sign::NoSign => Some(0),
}
}

fn get_new_args<'a>(old_args: &[Arg<'a>], correct_order: &'a [u32]) -> Result<Vec<Arg<'a>>, ()> {
let mut new_args: Vec<Arg> = Vec::new();
for (i, given_idx) in correct_order.iter().enumerate() {
// We need to keep the formatting in the same order but move the values
let values = match old_args.get(given_idx.to_owned() as usize) {
None => return Err(()),
Some(item) => item,
};
let formatting = match old_args.get(i) {
None => return Err(()),
Some(item) => item,
};
let new_arg = Arg {
value: values.value.clone(),
comma: formatting.comma.clone(),
// Kwargs are NOT allowed in .format (I checked)
equal: None,
keyword: None,
star: values.star,
whitespace_after_star: formatting.whitespace_after_star.clone(),
whitespace_after_arg: formatting.whitespace_after_arg.clone(),
};
new_args.push(new_arg);
}
Ok(new_args)
}

/// Returns the new call string, or returns an error if it cannot create a new
/// call string
fn get_new_call(module_text: &str, correct_order: &[u32]) -> Result<String, ()> {
let mut expression = match parse_expression(module_text) {
Err(_) => return Err(()),
Ok(item) => item,
};
let mut call = match match_call(&mut expression) {
Err(_) => return Err(()),
Ok(item) => item,
};
call.args = match get_new_args(&call.args, correct_order) {
Err(_) => return Err(()),
Ok(item) => item,
};
// Create the new function
if let Expression::Attribute(item) = &*call.func {
// Converting the struct to a struct and then back is not very efficient, but
// regexs were the simplest way I could find to remove the specifiers
let mut state = CodegenState::default();
item.codegen(&mut state);
let cleaned = remove_specifiers(&state.to_string());
match match_expression(&cleaned) {
Err(_) => return Err(()),
Ok(item) => call.func = Box::new(item),
};
// Create the string
let mut final_state = CodegenState::default();
expression.codegen(&mut final_state);
// FOR REVIEWER: If the new and old are identical, don't create a fix. Pyupgrade
// doesnt even want me to report this, so we could just have an enum for errors,
// and if a special one is returned here then we dont even report a fix
if module_text == final_state.to_string() {
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

When does this happen?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah nevermind.

return Err(());
}
return Ok(final_state.to_string());
}
Err(())
}

fn get_specifier_order(value_str: &str) -> Vec<u32> {
let mut specifier_ints: Vec<u32> = vec![];
// Whether the previous character was a Lbrace. If this is true and the next
// character is an integer than this integer gets added to the list of
// constants
let mut prev_l_brace = false;
for (_, tok, _) in lexer::make_tokenizer(value_str).flatten() {
if Tok::Lbrace == tok {
prev_l_brace = true;
} else if let Tok::Int { value } = tok {
if prev_l_brace {
if let Some(int_val) = convert_big_int(&value) {
specifier_ints.push(int_val);
}
}
prev_l_brace = false;
} else {
prev_l_brace = false;
}
}
specifier_ints
}

/// Returns a string without the format specifiers. Ex. "Hello {0} {1}" ->
/// "Hello {} {}"
fn remove_specifiers(raw_specifiers: &str) -> String {
let new_str = FORMAT_SPECIFIER
.replace_all(raw_specifiers, "{$fmt}")
.to_string();
new_str
}

/// Checks if there is a single specifier in the string. The string must either
/// have all formatterts or no formatters (or else an error will be thrown), so
/// this will work as long as the python code is valid
fn has_valid_specifiers(raw_specifiers: &str) -> bool {
// If there is at least one match we should return a true
let mut at_least_one = false;
for cap in FORMAT_SPECIFIER.captures_iter(raw_specifiers) {
at_least_one = true;
// If we have a nested format specifier we need to return a false
if FIRST_HALF.is_match(&cap[2]) {
return false;
}
}
at_least_one
}

/// Checks if the string has specifiers and that they are in the correct order
fn valid_specifiers(raw_specifiers: &str) -> bool {
if !has_valid_specifiers(raw_specifiers) {
return false;
}
let mut specifiers = get_specifier_order(raw_specifiers);
specifiers.sort_unstable();
let mut current = 0;
for item in specifiers {
if item == current {
current += 1;
} else {
return false;
}
}
true
}

/// UP030
pub fn format_specifiers(checker: &mut Checker, expr: &Expr, func: &Expr) {
if let ExprKind::Attribute { value, attr, .. } = &func.node {
if let ExprKind::Constant {
value: Constant::Str(provided_string),
..
} = &value.node
{
// The function must be a format function
if attr != "format" {
return;
}
// The squigly brackets must have format specifiers inside of them
if !valid_specifiers(provided_string) {
return;
}
let as_ints = get_specifier_order(provided_string);
let call_range = Range::from_located(expr);
let call_text = checker.locator.slice_source_code_range(&call_range);
let mut diagnostic =
Diagnostic::new(violations::FormatSpecifiers, Range::from_located(expr));
match get_new_call(&call_text, &as_ints) {
// If we get any errors, we know that there is an issue that we cannot fix
// so we should just report that there is a formatting issue. Currently the
// only issue we know of is a ParseError from a multi line format statement
// inside a function call that does not explicitly say there are multiple
// lines. Follow my Github issue here:
// https://github.com/Instagram/LibCST/issues/846

// Is there a way to specify that here this is not fixable, but below it is??
Err(_) => checker.diagnostics.push(diagnostic),
Ok(new_call) => {
if checker.patch(diagnostic.kind.code()) {
diagnostic.amend(Fix::replacement(
new_call,
expr.location,
expr.end_location.unwrap(),
));
}
checker.diagnostics.push(diagnostic);
}
};
}
}
}
2 changes: 2 additions & 0 deletions src/pyupgrade/rules/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ pub use convert_named_tuple_functional_to_class::convert_named_tuple_functional_
pub use convert_typed_dict_functional_to_class::convert_typed_dict_functional_to_class;
pub use datetime_utc_alias::datetime_utc_alias;
pub use deprecated_unittest_alias::deprecated_unittest_alias;
pub use format_specifiers::format_specifiers;
pub use native_literals::native_literals;
use once_cell::sync::Lazy;
pub use open_alias::open_alias;
Expand Down Expand Up @@ -40,6 +41,7 @@ mod convert_named_tuple_functional_to_class;
mod convert_typed_dict_functional_to_class;
mod datetime_utc_alias;
mod deprecated_unittest_alias;
mod format_specifiers;
mod native_literals;
mod open_alias;
mod os_error_alias;
Expand Down