Skip to content

Commit

Permalink
Run unicode prefix rule over tokens (#8709)
Browse files Browse the repository at this point in the history
## Summary

It seems like the range of an `ExprStringLiteral` can be somewhat
unreliable when the string is part of an implicit concatenation with an
f-string. Using the tokens themselves is more reliable.

Closes #8680.
Closes #7784.
  • Loading branch information
charliermarsh committed Nov 16, 2023
1 parent 4ac78d5 commit cd29761
Show file tree
Hide file tree
Showing 6 changed files with 59 additions and 14 deletions.
3 changes: 3 additions & 0 deletions crates/ruff_linter/resources/test/fixtures/pyupgrade/UP025.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,6 @@

def hello():
return"Hello"

f"foo"u"bar"
f"foo" u"bar"
3 changes: 0 additions & 3 deletions crates/ruff_linter/src/checkers/ast/analyze/expression.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1266,9 +1266,6 @@ pub(crate) fn expression(expr: &Expr, checker: &mut Checker) {
if checker.enabled(Rule::HardcodedTempFile) {
flake8_bandit::rules::hardcoded_tmp_directory(checker, string);
}
if checker.enabled(Rule::UnicodeKindPrefix) {
pyupgrade::rules::unicode_kind_prefix(checker, string);
}
if checker.source_type.is_stub() {
if checker.enabled(Rule::StringOrBytesTooLong) {
flake8_pyi::rules::string_or_bytes_too_long(checker, expr);
Expand Down
4 changes: 4 additions & 0 deletions crates/ruff_linter/src/checkers/tokens.rs
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,10 @@ pub(crate) fn check_tokens(
pycodestyle::rules::tab_indentation(&mut diagnostics, tokens, locator, indexer);
}

if settings.rules.enabled(Rule::UnicodeKindPrefix) {
pyupgrade::rules::unicode_kind_prefix(&mut diagnostics, tokens);
}

if settings.rules.any_enabled(&[
Rule::InvalidCharacterBackspace,
Rule::InvalidCharacterSub,
Expand Down
1 change: 1 addition & 0 deletions crates/ruff_linter/src/registry.rs
Original file line number Diff line number Diff line change
Expand Up @@ -297,6 +297,7 @@ impl Rule {
| Rule::TabIndentation
| Rule::TrailingCommaOnBareTuple
| Rule::TypeCommentInStub
| Rule::UnicodeKindPrefix
| Rule::UselessSemicolon
| Rule::UTF8EncodingDeclaration => LintSource::Tokens,
Rule::IOError => LintSource::Io,
Expand Down
29 changes: 18 additions & 11 deletions crates/ruff_linter/src/rules/pyupgrade/rules/unicode_kind_prefix.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
use ruff_diagnostics::{AlwaysFixableViolation, Diagnostic, Edit, Fix};
use ruff_macros::{derive_message_formats, violation};
use ruff_python_ast::ExprStringLiteral;
use ruff_text_size::{Ranged, TextRange, TextSize};

use crate::checkers::ast::Checker;
use ruff_python_parser::lexer::LexResult;
use ruff_python_parser::{StringKind, Tok};

use ruff_text_size::{Ranged, TextRange, TextSize};

/// ## What it does
/// Checks for uses of the Unicode kind prefix (`u`) in strings.
Expand Down Expand Up @@ -39,13 +40,19 @@ impl AlwaysFixableViolation for UnicodeKindPrefix {
}

/// UP025
pub(crate) fn unicode_kind_prefix(checker: &mut Checker, string: &ExprStringLiteral) {
if string.unicode {
let mut diagnostic = Diagnostic::new(UnicodeKindPrefix, string.range);
diagnostic.set_fix(Fix::safe_edit(Edit::range_deletion(TextRange::at(
string.start(),
TextSize::from(1),
))));
checker.diagnostics.push(diagnostic);
pub(crate) fn unicode_kind_prefix(diagnostics: &mut Vec<Diagnostic>, tokens: &[LexResult]) {
for (token, range) in tokens.iter().flatten() {
if let Tok::String {
kind: StringKind::Unicode,
..
} = token
{
let mut diagnostic = Diagnostic::new(UnicodeKindPrefix, *range);
diagnostic.set_fix(Fix::safe_edit(Edit::range_deletion(TextRange::at(
range.start(),
TextSize::from(1),
))));
diagnostics.push(diagnostic);
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -248,4 +248,37 @@ UP025.py:19:5: UP025 [*] Remove unicode literals from strings
21 21 | # These should not change
22 22 | u = "Hello"

UP025.py:29:7: UP025 [*] Remove unicode literals from strings
|
27 | return"Hello"
28 |
29 | f"foo"u"bar"
| ^^^^^^ UP025
30 | f"foo" u"bar"
|
= help: Remove unicode prefix

Safe fix
26 26 | def hello():
27 27 | return"Hello"
28 28 |
29 |-f"foo"u"bar"
29 |+f"foo""bar"
30 30 | f"foo" u"bar"

UP025.py:30:8: UP025 [*] Remove unicode literals from strings
|
29 | f"foo"u"bar"
30 | f"foo" u"bar"
| ^^^^^^ UP025
|
= help: Remove unicode prefix

Safe fix
27 27 | return"Hello"
28 28 |
29 29 | f"foo"u"bar"
30 |-f"foo" u"bar"
30 |+f"foo" "bar"


0 comments on commit cd29761

Please sign in to comment.