Skip to content

Commit

Permalink
Rollup merge of rust-lang#120259 - HTGAzureX1212:HTGAzureX1212/split-…
Browse files Browse the repository at this point in the history
…diagnostics-uncommon-codepoints, r=Manishearth

Split Diagnostics for Uncommon Codepoints: Add List to Display Characters Involved

This Pull Request adds a list of the uncommon codepoints involved in the `uncommon_codepoints` lint, as outlined as a first step in rust-lang#120228.

Example rendered diagnostic:
```
error: identifier contains an uncommon Unicode codepoint: 'µ'
  --> $DIR/lint-uncommon-codepoints.rs:3:7
   |
LL | const µ: f64 = 0.000001;
   |       ^
   |
note: the lint level is defined here
  --> $DIR/lint-uncommon-codepoints.rs:1:9
   |
LL | #![deny(uncommon_codepoints)]
   |         ^^^^^^^^^^^^^^^^^^^
```

(Retrying rust-lang#120258.)
  • Loading branch information
fmease committed Jan 23, 2024
2 parents b0267be + da1d0c4 commit ed207ae
Show file tree
Hide file tree
Showing 8 changed files with 34 additions and 10 deletions.
8 changes: 8 additions & 0 deletions compiler/rustc_errors/src/diagnostic_impls.rs
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,14 @@ impl IntoDiagnosticArg for char {
}
}

impl IntoDiagnosticArg for Vec<char> {
fn into_diagnostic_arg(self) -> DiagnosticArgValue<'static> {
DiagnosticArgValue::StrListSepByAnd(
self.into_iter().map(|c| Cow::Owned(format!("{c:?}"))).collect(),
)
}
}

impl IntoDiagnosticArg for Symbol {
fn into_diagnostic_arg(self) -> DiagnosticArgValue<'static> {
self.to_ident_string().into_diagnostic_arg()
Expand Down
5 changes: 4 additions & 1 deletion compiler/rustc_lint/messages.ftl
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,10 @@ lint_hidden_unicode_codepoints = unicode codepoint changing visible direction of
lint_identifier_non_ascii_char = identifier contains non-ASCII characters
lint_identifier_uncommon_codepoints = identifier contains uncommon Unicode codepoints
lint_identifier_uncommon_codepoints = identifier contains {$codepoints_len ->
[one] an uncommon Unicode codepoint
*[other] uncommon Unicode codepoints
}: {$codepoints}
lint_ignored_unless_crate_specified = {$level}({$name}) is ignored unless specified at crate level
Expand Down
5 changes: 4 additions & 1 deletion compiler/rustc_lint/src/lints.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1107,7 +1107,10 @@ pub struct IdentifierNonAsciiChar;

#[derive(LintDiagnostic)]
#[diag(lint_identifier_uncommon_codepoints)]
pub struct IdentifierUncommonCodepoints;
pub struct IdentifierUncommonCodepoints {
pub codepoints: Vec<char>,
pub codepoints_len: usize,
}

#[derive(LintDiagnostic)]
#[diag(lint_confusable_identifier_pair)]
Expand Down
12 changes: 11 additions & 1 deletion compiler/rustc_lint/src/non_ascii_idents.rs
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,17 @@ impl EarlyLintPass for NonAsciiIdents {
if check_uncommon_codepoints
&& !symbol_str.chars().all(GeneralSecurityProfile::identifier_allowed)
{
cx.emit_span_lint(UNCOMMON_CODEPOINTS, sp, IdentifierUncommonCodepoints);
let codepoints: Vec<_> = symbol_str
.chars()
.filter(|c| !GeneralSecurityProfile::identifier_allowed(*c))
.collect();
let codepoints_len = codepoints.len();

cx.emit_span_lint(
UNCOMMON_CODEPOINTS,
sp,
IdentifierUncommonCodepoints { codepoints, codepoints_len },
);
}
}

Expand Down
2 changes: 1 addition & 1 deletion tests/ui/lexer/lex-emoji-identifiers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ fn invalid_emoji_usages() {
let wireless🛜 = "basic emoji"; //~ ERROR: identifiers cannot contain emoji
// FIXME
let key1️⃣ = "keycap sequence"; //~ ERROR: unknown start of token
//~^ WARN: identifier contains uncommon Unicode codepoints
//~^ WARN: identifier contains an uncommon Unicode codepoint
let flag🇺🇳 = "flag sequence"; //~ ERROR: identifiers cannot contain emoji
let wales🏴 = "tag sequence"; //~ ERROR: identifiers cannot contain emoji
let folded🙏🏿 = "modifier sequence"; //~ ERROR: identifiers cannot contain emoji
Expand Down
2 changes: 1 addition & 1 deletion tests/ui/lexer/lex-emoji-identifiers.stderr
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ error: identifiers cannot contain emoji: `folded🙏🏿`
LL | let folded🙏🏿 = "modifier sequence";
| ^^^^^^^^^^

warning: identifier contains uncommon Unicode codepoints
warning: identifier contains an uncommon Unicode codepoint: '\u{fe0f}'
--> $DIR/lex-emoji-identifiers.rs:6:9
|
LL | let key1️⃣ = "keycap sequence";
Expand Down
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
#![deny(uncommon_codepoints)]

const µ: f64 = 0.000001; //~ ERROR identifier contains uncommon Unicode codepoints
const µ: f64 = 0.000001; //~ ERROR identifier contains an uncommon Unicode codepoint
//~| WARNING should have an upper case name

fn dijkstra() {} //~ ERROR identifier contains uncommon Unicode codepoints
fn dijkstra() {} //~ ERROR identifier contains an uncommon Unicode codepoint

fn main() {
let ㇻㇲㇳ = "rust"; //~ ERROR identifier contains uncommon Unicode codepoints
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
error: identifier contains uncommon Unicode codepoints
error: identifier contains an uncommon Unicode codepoint: 'µ'
--> $DIR/lint-uncommon-codepoints.rs:3:7
|
LL | const µ: f64 = 0.000001;
Expand All @@ -10,13 +10,13 @@ note: the lint level is defined here
LL | #![deny(uncommon_codepoints)]
| ^^^^^^^^^^^^^^^^^^^

error: identifier contains uncommon Unicode codepoints
error: identifier contains an uncommon Unicode codepoint: 'ij'
--> $DIR/lint-uncommon-codepoints.rs:6:4
|
LL | fn dijkstra() {}
| ^^^^^^^

error: identifier contains uncommon Unicode codepoints
error: identifier contains uncommon Unicode codepoints: 'ㇻ', 'ㇲ', and 'ㇳ'
--> $DIR/lint-uncommon-codepoints.rs:9:9
|
LL | let ㇻㇲㇳ = "rust";
Expand Down

0 comments on commit ed207ae

Please sign in to comment.