Skip to content

Commit

Permalink
bpo-39337: encodings.normalize_encoding() now ignores non-ASCII chara…
Browse files Browse the repository at this point in the history
…cters (pythonGH-22219)
  • Loading branch information
shihai1991 authored and adorilson committed Mar 11, 2021
1 parent 138b451 commit 5959a19
Show file tree
Hide file tree
Showing 4 changed files with 21 additions and 2 deletions.
5 changes: 5 additions & 0 deletions Doc/whatsnew/3.10.rst
Original file line number Diff line number Diff line change
Expand Up @@ -186,6 +186,11 @@ by :func:`curses.color_content`, :func:`curses.init_color`,
support is provided by the underlying ncurses library.
(Contributed by Jeffrey Kintscher and Hans Petter Jansson in :issue:`36982`.)

encodings
---------
:func:`encodings.normalize_encoding` now ignores non-ASCII characters.
(Contributed by Hai Shi in :issue:`39337`.)

glob
----

Expand Down
3 changes: 2 additions & 1 deletion Lib/encodings/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,8 @@ def normalize_encoding(encoding):
if c.isalnum() or c == '.':
if punct and chars:
chars.append('_')
chars.append(c)
if c.isascii():
chars.append(c)
punct = False
else:
punct = True
Expand Down
14 changes: 13 additions & 1 deletion Lib/test/test_codecs.py
Original file line number Diff line number Diff line change
Expand Up @@ -3417,7 +3417,7 @@ def test_rot13_func(self):

class CodecNameNormalizationTest(unittest.TestCase):
"""Test codec name normalization"""
def test_normalized_encoding(self):
def test_codecs_lookup(self):
FOUND = (1, 2, 3, 4)
NOT_FOUND = (None, None, None, None)
def search_function(encoding):
Expand All @@ -3439,6 +3439,18 @@ def search_function(encoding):
self.assertEqual(NOT_FOUND, codecs.lookup('BBB.8'))
self.assertEqual(NOT_FOUND, codecs.lookup('a\xe9\u20ac-8'))

def test_encodings_normalize_encoding(self):
# encodings.normalize_encoding() ignores non-ASCII characters.
normalize = encodings.normalize_encoding
self.assertEqual(normalize('utf_8'), 'utf_8')
self.assertEqual(normalize('utf\xE9\u20AC\U0010ffff-8'), 'utf_8')
self.assertEqual(normalize('utf 8'), 'utf_8')
# encodings.normalize_encoding() doesn't convert
# characters to lower case.
self.assertEqual(normalize('UTF 8'), 'UTF_8')
self.assertEqual(normalize('utf.8'), 'utf.8')
self.assertEqual(normalize('utf...8'), 'utf...8')


if __name__ == "__main__":
unittest.main()
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
:func:`encodings.normalize_encoding` now ignores non-ASCII characters.

0 comments on commit 5959a19

Please sign in to comment.