Skip to content

Commit

Permalink
[chromium/src PRESUBMIT] Add an inclusive language check
Browse files Browse the repository at this point in the history
This ports over the PRESUBMIT added under infra/infra
https://chromium-review.googlesource.com/c/chromium/src/+/2718892
with some tweaks to fix things discovered:
https://chromium-review.googlesource.com/c/infra/infra/+/2737384

Note that as soon as this lands, it will start running on CLs when you
execute git cl upload. The PRESUBMIT will block submission unless
the offending line includes // nocheck

Bug: 1177609
Change-Id: I6c7e86f0e43eeedba81c4ccd5c9b0206ac03d9db
Reviewed-on: https://chromium-review.googlesource.com/c/chromium/src/+/2744163
Reviewed-by: Dirk Pranke <dpranke@google.com>
Reviewed-by: Eric Foo <efoo@chromium.org>
Commit-Queue: Eric Foo <efoo@chromium.org>
Commit-Queue: Sean McCullough <seanmccullough@chromium.org>
Cr-Commit-Position: refs/heads/master@{#868250}
  • Loading branch information
seanmccullough authored and Chromium LUCI CQ committed Mar 31, 2021
1 parent dfb2fef commit eb9f9b9
Show file tree
Hide file tree
Showing 5 changed files with 1,522 additions and 0 deletions.
117 changes: 117 additions & 0 deletions PRESUBMIT.py
Original file line number Diff line number Diff line change
Expand Up @@ -5358,3 +5358,120 @@ def ModifiedPrefMigration(affected_file):
potential_problems
)]
return []


# string pattern, sequence of strings to show when pattern matches,
# error flag. True if match is a presubmit error, otherwise it's a warning.
_NON_INCLUSIVE_TERMS = (
(
# Note that \b pattern in python re is pretty particular. In this
# regexp, 'class WhiteList ...' will match, but 'class FooWhiteList
# ...' will not. This may require some tweaking to catch these cases
# without triggering a lot of false positives. Leaving it naive and
# less matchy for now.
r'/\b(?i)((black|white)list|master|slave)\b', # nocheck
(
'Please don\'t use blacklist, whitelist, ' # nocheck
'master, or slave in your', # nocheck
'code and make every effort to use other terms. Using "// nocheck"',
'"# nocheck" or "<!-- nocheck -->"',
'at the end of the offending line will bypass this PRESUBMIT error',
'but avoid using this whenever possible. Reach out to',
'community@chromium.org if you have questions'),
True),)


def _GetMessageForMatchingTerm(input_api, affected_file, line_number, line,
term, message):
"""Helper method for CheckInclusiveLanguage.
Returns an string composed of the name of the file, the line number where the
match has been found and the additional text passed as |message| in case the
target type name matches the text inside the line passed as parameter.
"""
result = []

# A // nocheck comment will bypass this error.
if line.endswith(" nocheck") or line.endswith("<!-- nocheck -->"):
return result

# Ignore C-style single-line comments about banned terms.
if input_api.re.search(r"//.*$", line):
line = input_api.re.sub(r"//.*$", "", line)

# Ignore lines from C-style multi-line comments.
if input_api.re.search(r"^\s*\*", line):
return result

# Ignore Python-style comments about banned terms.
# This actually removes comment text from the first # on.
if input_api.re.search(r"#.*$", line):
line = input_api.re.sub(r"#.*$", "", line)

matched = False
if term[0:1] == '/':
regex = term[1:]
if input_api.re.search(regex, line):
matched = True
elif term in line:
matched = True

if matched:
result.append(' %s:%d:' % (affected_file.LocalPath(), line_number))
for message_line in message:
result.append(' %s' % message_line)

return result


def CheckInclusiveLanguage(input_api, output_api):
"""Make sure that banned non-inclusive terms are not used."""
warnings = []
errors = []

# Note that this matches exact path prefixes, and does not match
# subdirectories. Only files directly in an exlcluded path will
# match.
def IsExcludedFile(affected_file, excluded_paths):
local_dir = input_api.os_path.dirname(affected_file.LocalPath())

return local_dir in excluded_paths

def CheckForMatch(affected_file, line_num, line, term, message, error):
problems = _GetMessageForMatchingTerm(input_api, affected_file, line_num,
line, term, message)

if problems:
if error:
errors.extend(problems)
else:
warnings.extend(problems)

excluded_paths = []
f = input_api.ReadFile(
input_api.os_path.join(input_api.change.RepositoryRoot(), 'infra',
'inclusive_language_presubmit_exempt_dirs.txt'))
for line in f.split('\n'):
path = line.split(' ')[0]
if len(path) > 0:
excluded_paths.append(path)

excluded_paths = set(excluded_paths)
for f in input_api.AffectedFiles():
for line_num, line in f.ChangedContents():
for term, message, error in _NON_INCLUSIVE_TERMS:
if IsExcludedFile(f, excluded_paths):
continue
CheckForMatch(f, line_num, line, term, message, error)

result = []
if (warnings):
result.append(
output_api.PresubmitPromptWarning(
'Banned non-inclusive language was used.\n' + '\n'.join(warnings)))
if (errors):
result.append(
output_api.PresubmitError('Banned non-inclusive language was used.\n' +
'\n'.join(errors)))
return result

166 changes: 166 additions & 0 deletions PRESUBMIT_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -3939,5 +3939,171 @@ def testDeletedMarkerRaisesError(self):
errors[0].message)


class InclusiveLanguageCheckTest(unittest.TestCase):
def testBlockedTerms(self):
input_api = MockInputApi()
input_api.change.RepositoryRoot = lambda: ''

input_api.files = [
MockFile('infra/inclusive_language_presubmit_exempt_dirs.txt', [
'some/dir 2 1',
'some/other/dir 2 1',
]),
MockFile('some/ios/file.mm',
['TEST(SomeClassTest, SomeInteraction, blacklist) {', # nocheck
'}']),
MockFile('some/mac/file.mm',
['TEST(SomeClassTest, SomeInteraction, BlackList) {', # nocheck
'}']),
MockFile('another/ios_file.mm',
['class SomeTest : public testing::Test blocklist {};']),
MockFile('some/ios/file_egtest.mm',
['- (void)testSomething { V(whitelist); }']), # nocheck
MockFile('some/ios/file_unittest.mm',
['TEST_F(SomeTest, Whitelist) { V(allowlist); }']), # nocheck
MockFile('some/doc/file.md',
['# Title',
'Some markdown text includes master.', # nocheck
]),
MockFile('some/doc/ok_file.md',
['# Title',
# This link contains a '//' which the matcher thinks is a
# C-style comment, and the 'master' term appears after the
# '//' in the URL, so it gets ignored as a side-effect.
'[Ignored](https://git/project.git/+/master/foo)', # nocheck
]),
MockFile('some/doc/branch_name_file.md',
['# Title',
# Matches appearing before `//` still trigger the check.
'[src/master](https://git/p.git/+/master/foo)', # nocheck
]),
MockFile('some/java/file/TestJavaDoc.java',
['/**',
' * This line contains the word master,', # nocheck
'* ignored because this is a comment. See {@link',
' * https://s/src/+/master:tools/README.md}', # nocheck
' */']),
MockFile('some/java/file/TestJava.java',
['class TestJava {',
' public String master;', # nocheck
'}']),
MockFile('some/html/file.html',
['<-- an existing html multiline comment',
'says "master" here', # nocheck
'in the comment -->'])
]

errors = PRESUBMIT.CheckInclusiveLanguage(input_api, MockOutputApi())
self.assertEqual(1, len(errors))
self.assertTrue('some/ios/file.mm' in errors[0].message)
self.assertTrue('another/ios_file.mm' not in errors[0].message)
self.assertTrue('some/mac/file.mm' in errors[0].message)
self.assertTrue('some/ios/file_egtest.mm' in errors[0].message)
self.assertTrue('some/ios/file_unittest.mm' in errors[0].message)
self.assertTrue('some/doc/file.md' in errors[0].message)
self.assertTrue('some/doc/ok_file.md' not in errors[0].message)
self.assertTrue('some/doc/branch_name_file.md' in errors[0].message)
self.assertTrue('some/java/file/TestJavaDoc.java' not in errors[0].message)
self.assertTrue('some/java/file/TestJava.java' in errors[0].message)
self.assertTrue('some/html/file.html' in errors[0].message)

def testBlockedTermsWithLegacy(self):
input_api = MockInputApi()
input_api.change.RepositoryRoot = lambda: ''

input_api.files = [
MockFile('infra/inclusive_language_presubmit_exempt_dirs.txt', [
'some/ios 2 1',
'some/other/dir 2 1',
]),
MockFile('some/ios/file.mm',
['TEST(SomeClassTest, SomeInteraction, blacklist) {', # nocheck
'}']),
MockFile('some/ios/subdir/file.mm',
['TEST(SomeClassTest, SomeInteraction, blacklist) {', # nocheck
'}']),
MockFile('some/mac/file.mm',
['TEST(SomeClassTest, SomeInteraction, BlackList) {', # nocheck
'}']),
MockFile('another/ios_file.mm',
['class SomeTest : public testing::Test blocklist {};']),
MockFile('some/ios/file_egtest.mm',
['- (void)testSomething { V(whitelist); }']), # nocheck
MockFile('some/ios/file_unittest.mm',
['TEST_F(SomeTest, Whitelist) { V(allowlist); }']), # nocheck
]

errors = PRESUBMIT.CheckInclusiveLanguage(input_api, MockOutputApi())
self.assertEqual(1, len(errors))
self.assertTrue('some/ios/file.mm' not in errors[0].message)
self.assertTrue('some/ios/subdir/file.mm' in errors[0].message)
self.assertTrue('another/ios_file.mm' not in errors[0].message)
self.assertTrue('some/mac/file.mm' in errors[0].message)
self.assertTrue('some/ios/file_egtest.mm' not in errors[0].message)
self.assertTrue('some/ios/file_unittest.mm' not in errors[0].message)

def testBlockedTermsWithNocheck(self):
input_api = MockInputApi()
input_api.change.RepositoryRoot = lambda: ''

input_api.files = [
MockFile('infra/inclusive_language_presubmit_exempt_dirs.txt', [
'some/dir 2 1',
'some/other/dir 2 1',
]),
MockFile('some/ios/file.mm',
['TEST(SomeClassTest, SomeInteraction, ',
' blacklist) { // nocheck', # nocheck
'}']),
MockFile('some/mac/file.mm',
['TEST(SomeClassTest, SomeInteraction, ',
'BlackList) { // nocheck', # nocheck
'}']),
MockFile('another/ios_file.mm',
['class SomeTest : public testing::Test blocklist {};']),
MockFile('some/ios/file_egtest.mm',
['- (void)testSomething { ',
'V(whitelist); } // nocheck']), # nocheck
MockFile('some/ios/file_unittest.mm',
['TEST_F(SomeTest, Whitelist) // nocheck', # nocheck
' { V(allowlist); }']),
MockFile('some/doc/file.md',
['Master in markdown <!-- nocheck -->', # nocheck
'## Subheading is okay']),
MockFile('some/java/file/TestJava.java',
['class TestJava {',
' public String master; // nocheck', # nocheck
'}']),
MockFile('some/html/file.html',
['<-- an existing html multiline comment',
'says "master" here --><!-- nocheck -->', # nocheck
'<!-- in the comment -->'])
]

errors = PRESUBMIT.CheckInclusiveLanguage(input_api, MockOutputApi())
self.assertEqual(0, len(errors))

def testTopLevelDirExcempt(self):
input_api = MockInputApi()
input_api.change.RepositoryRoot = lambda: ''

input_api.files = [
MockFile('infra/inclusive_language_presubmit_exempt_dirs.txt', [
'. 2 1',
'some/other/dir 2 1',
]),
MockFile('PRESUBMIT_test.py',
['TEST(SomeClassTest, SomeInteraction, blacklist) {', # nocheck
'}']),
MockFile('PRESUBMIT.py',
['- (void)testSth { V(whitelist); } // nocheck']), # nocheck
]

errors = PRESUBMIT.CheckInclusiveLanguage(input_api, MockOutputApi())
self.assertEqual(1, len(errors))
self.assertTrue('PRESUBMIT_test.py' in errors[0].message)
self.assertTrue('PRESUBMIT.py' not in errors[0].message)


if __name__ == '__main__':
unittest.main()
6 changes: 6 additions & 0 deletions infra/OWNERS
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,9 @@ ynovikov@chromium.org

# For code coverage and PGO related.
liaoyuke@chromium.org

# Specific OWNERS to review changes to the non-inclusive terms PRESUBMIT checks
per-file inclusive_language_presubmit_exempt_dirs.txt=efoo@chromium.org
per-file inclusive_language_presubmit_exempt_dirs.txt=nawei@chromium.org
per-file update_inclusive_language_presubmit_exempt_dirs.sh=efoo@chromium.org
per-file update_inclusive_language_presubmit_exempt_dirs.sh=seanmccullough@chromium.org

0 comments on commit eb9f9b9

Please sign in to comment.