Skip to content

Commit

Permalink
UnicodeInCode: ban using Unicode characters outside literals and comm…
Browse files Browse the repository at this point in the history
…ents.

This turns out to be amazingly rare.

PiperOrigin-RevId: 408381304
  • Loading branch information
graememorgan authored and Error Prone Team committed Nov 9, 2021
1 parent f68b627 commit 62350ef
Show file tree
Hide file tree
Showing 4 changed files with 192 additions and 0 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
/*
* Copyright 2021 The Error Prone Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.google.errorprone.bugpatterns;

import static com.google.common.collect.ImmutableList.toImmutableList;
import static com.google.common.collect.Streams.concat;
import static com.google.errorprone.BugPattern.SeverityLevel.ERROR;
import static com.google.errorprone.matchers.Description.NO_MATCH;
import static com.google.errorprone.util.ErrorProneTokens.getTokens;

import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableRangeSet;
import com.google.common.collect.Range;
import com.google.errorprone.BugPattern;
import com.google.errorprone.VisitorState;
import com.google.errorprone.bugpatterns.BugChecker.CompilationUnitTreeMatcher;
import com.google.errorprone.fixes.FixedPosition;
import com.google.errorprone.matchers.Description;
import com.google.errorprone.util.ErrorProneToken;
import com.sun.source.tree.CompilationUnitTree;
import com.sun.tools.javac.parser.Tokens.TokenKind;

/** Bans using non-ASCII Unicode characters outside string literals and comments. */
@BugPattern(
name = "UnicodeInCode",
severity = ERROR,
summary =
"Avoid using non-ASCII Unicode characters outside of comments and literals, as they can be"
+ " confusing.")
public final class UnicodeInCode extends BugChecker implements CompilationUnitTreeMatcher {
@Override
public Description matchCompilationUnit(CompilationUnitTree tree, VisitorState state) {
ImmutableRangeSet<Integer> commentsAndLiterals = commentsAndLiterals(state);

CharSequence sourceCode = state.getSourceCode();

for (int i = 0; i < sourceCode.length(); ++i) {
char c = sourceCode.charAt(i);

if (!isAcceptableAscii(c) && !commentsAndLiterals.contains(i)) {
state.reportMatch(describeMatch(new FixedPosition(tree, i)));
}
}
return NO_MATCH;
}

private static boolean isAcceptableAscii(char c) {
return (c >= 0x20 && c <= 0x7E) || c == '\n' || c == '\r' || c == '\t';
}

private static ImmutableRangeSet<Integer> commentsAndLiterals(VisitorState state) {
ImmutableList<ErrorProneToken> tokens =
getTokens(state.getSourceCode().toString(), state.context);
return ImmutableRangeSet.unionOf(
concat(
tokens.stream()
.filter(
t ->
t.kind().equals(TokenKind.STRINGLITERAL)
|| t.kind().equals(TokenKind.CHARLITERAL))
.map(t -> Range.closed(t.pos(), t.endPos())),
tokens.stream()
.flatMap(t -> t.comments().stream())
.map(
c ->
Range.closed(
c.getSourcePos(0), c.getSourcePos(0) + c.getText().length())))
.collect(toImmutableList()));
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -356,6 +356,7 @@
import com.google.errorprone.bugpatterns.UndefinedEquals;
import com.google.errorprone.bugpatterns.UngroupedOverloads;
import com.google.errorprone.bugpatterns.UnicodeEscape;
import com.google.errorprone.bugpatterns.UnicodeInCode;
import com.google.errorprone.bugpatterns.UnnecessarilyFullyQualified;
import com.google.errorprone.bugpatterns.UnnecessarilyVisible;
import com.google.errorprone.bugpatterns.UnnecessaryAnonymousClass;
Expand Down Expand Up @@ -735,6 +736,7 @@ public static ScannerSupplier errorChecks() {
TruthSelfEquals.class,
TryFailThrowable.class,
TypeParameterQualifier.class,
UnicodeInCode.class,
UnnecessaryCheckNotNull.class,
UnnecessaryTypeArgument.class,
UnusedAnonymousClass.class,
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,86 @@
/*
* Copyright 2021 The Error Prone Authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

package com.google.errorprone.bugpatterns;

import com.google.errorprone.CompilationTestHelper;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.JUnit4;

/** Tests for {@link UnicodeInCode}. */
@RunWith(JUnit4.class)
public final class UnicodeInCodeTest {
private final CompilationTestHelper helper =
CompilationTestHelper.newInstance(UnicodeInCode.class, getClass());

@Test
public void negative() {
helper
.addSourceLines(
"Test.java", //
"class Test {",
" final int noUnicodeHereBoss = 1;",
"}")
.doTest();
}

@Test
public void negativeInComment() {
helper
.addSourceLines(
"Test.java", //
"/** \u03C0 */",
"class Test {",
" final int noUnicodeHereBoss = 1; // roughly \u03C0",
"}")
.doTest();
}

@Test
public void negativeInStringLiteral() {
helper
.addSourceLines(
"Test.java", //
"class Test {",
" static final String pi = \"\u03C0\";",
"}")
.doTest();
}

@Test
public void negativeInCharLiteral() {
helper
.addSourceLines(
"Test.java", //
"class Test {",
" static final char pi = '\u03C0';",
"}")
.doTest();
}

@Test
public void positive() {
helper
.addSourceLines(
"Test.java", //
"class Test {",
" // BUG: Diagnostic contains:",
" static final double \u03C0 = 3;",
"}")
.doTest();
}
}
20 changes: 20 additions & 0 deletions docs/bugpattern/UnicodeInCode.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
Using non-ASCII Unicode characters in code can be confusing, and potentially
unsafe.

For example, homoglyphs can result in a different method to the one that was
expected being invoked.

```java
import static com.google.common.base.Objects.equal;

public void isAuthenticated(String password) {
// The "l" here is not what it seems.
return equaⅼ(password, this.password());
}

// ...

private boolean equaⅼ(String a, String b) {
return true;
}
```

0 comments on commit 62350ef

Please sign in to comment.