Skip to content

Commit

Permalink
Merge pull request #2189 from roja-a-m/invalid-prettify-object-with-u…
Browse files Browse the repository at this point in the history
…nicode

Fix - Invalid prettification of object with unicode as key
  • Loading branch information
bitwiseman committed Aug 30, 2023
2 parents 75a9093 + 88c0ab5 commit 44b7131
Show file tree
Hide file tree
Showing 7 changed files with 80 additions and 14 deletions.
7 changes: 4 additions & 3 deletions js/src/javascript/acorn.js

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 6 additions & 1 deletion js/src/javascript/tokenizer.js
Original file line number Diff line number Diff line change
Expand Up @@ -484,6 +484,9 @@ function unescape_string(s) {
matched = input_scan.match(/x([0-9A-Fa-f]{2})/g);
} else if (input_scan.peek() === 'u') {
matched = input_scan.match(/u([0-9A-Fa-f]{4})/g);
if (!matched) {
matched = input_scan.match(/u\{([0-9A-Fa-f]+)\}/g);
}
} else {
out += '\\';
if (input_scan.hasNext()) {
Expand All @@ -507,7 +510,9 @@ function unescape_string(s) {
} else if (escaped >= 0x00 && escaped < 0x20) {
// leave 0x00...0x1f escaped
out += '\\' + matched[0];
continue;
} else if (escaped > 0x10FFFF) {
// If the escape sequence is out of bounds, keep the original sequence and continue conversion
out += '\\' + matched[0];
} else if (escaped === 0x22 || escaped === 0x27 || escaped === 0x5c) {
// single-quote, apostrophe, backslash - escape these
out += '\\' + String.fromCharCode(escaped);
Expand Down
14 changes: 11 additions & 3 deletions python/jsbeautifier/javascript/acorn.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,14 +43,20 @@
# _nonASCIIidentifierStart = re.compile("[" + _nonASCIIidentifierStartChars + "]")
# _nonASCIIidentifier = re.compile("[" + _nonASCIIidentifierStartChars + _nonASCIIidentifierChars + "]")

_unicodeEscapeOrCodePoint = six.u(r"\\u[0-9a-fA-F]{4}|\\u\{[0-9a-fA-F]+\}")

_identifierStart = (
six.u(r"(?:\\u[0-9a-fA-F]{4}|[")
six.u("(?:")
+ _unicodeEscapeOrCodePoint
+ six.u("|[")
+ _baseASCIIidentifierStartChars
+ _nonASCIIidentifierStartChars
+ six.u("])")
)
_identifierChars = (
six.u(r"(?:\\u[0-9a-fA-F]{4}|[")
six.u("(?:")
+ _unicodeEscapeOrCodePoint
+ six.u("|[")
+ _baseASCIIidentifierChars
+ _nonASCIIidentifierStartChars
+ _nonASCIIidentifierChars
Expand All @@ -61,7 +67,9 @@

identifierStart = re.compile(_identifierStart)
identifierMatch = re.compile(
six.u(r"(?:\\u[0-9a-fA-F]{4}|[")
six.u("(?:")
+ _unicodeEscapeOrCodePoint
+ six.u("|[")
+ _baseASCIIidentifierChars
+ _nonASCIIidentifierStartChars
+ _nonASCIIidentifierChars
Expand Down
6 changes: 5 additions & 1 deletion python/jsbeautifier/javascript/tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -600,6 +600,8 @@ def unescape_string(self, s):
matched = input_scan.match(re.compile(r"x([0-9A-Fa-f]{2})"))
elif input_scan.peek() == "u":
matched = input_scan.match(re.compile(r"u([0-9A-Fa-f]{4})"))
if not matched:
matched = input_scan.match(re.compile(r"u\{([0-9A-Fa-f]+)\}"))
else:
out += "\\"
if input_scan.hasNext():
Expand All @@ -620,7 +622,9 @@ def unescape_string(self, s):
elif escaped >= 0x00 and escaped < 0x20:
# leave 0x00...0x1f escaped
out += "\\" + matched.group(0)
continue
elif escaped > 0x10FFFF:
# If the escape sequence is out of bounds, keep the original sequence and continue conversion
out += "\\" + matched.group(0)
elif escaped == 0x22 or escaped == 0x27 or escaped == 0x5C:
# single-quote, apostrophe, backslash - escape these
out += "\\" + chr(escaped)
Expand Down
27 changes: 23 additions & 4 deletions test/data/javascript/node.mustache
Original file line number Diff line number Diff line change
Expand Up @@ -453,21 +453,40 @@ function run_javascript_tests(test_obj, Urlencoded, js_beautify, html_beautify,
bt('"—"');
bt('"\\x41\\x42\\x43\\x01"', '"\\x41\\x42\\x43\\x01"');
bt('"\\u2022"', '"\\u2022"');
bt('"\\u{2022}"', '"\\u{2022}"');
bt('a = /\s+/');
// bt('a = /\\x41/','a = /A/');
bt('"\\u2022";a = /\s+/;"\\x41\\x42\\x43\\x01".match(/\\x41/);','"\\u2022";\na = /\s+/;\n"\\x41\\x42\\x43\\x01".match(/\\x41/);');
test_fragment('"\\x22\\x27",\'\\x22\\x27\',"\\x5c",\'\\x5c\',"\\xff and \\xzz","unicode \\u0000 \\u0022 \\u0027 \\u005c \\uffff \\uzzzz"', '"\\x22\\x27", \'\\x22\\x27\', "\\x5c", \'\\x5c\', "\\xff and \\xzz", "unicode \\u0000 \\u0022 \\u0027 \\u005c \\uffff \\uzzzz"');
test_fragment('"\\x41\\x42\\x01\\x43"');
test_fragment('"\\x41\\x42\\u0001\\x43"');
test_fragment('"\\x41\\x42\\u{0001}\\x43"');
test_fragment('"\\x20\\x40\\x4a"');
test_fragment('"\\xff\\x40\\x4a"');
test_fragment('"\\u0072\\u016B\\u0137\\u012B\\u0074\\u0069\\u0073"');
test_fragment('"\\u{0072}\\u{016B}\\u{110000}\\u{137}\\u012B\\x74\\u{0000069}\\u{073}"');
test_fragment('"Google Chrome est\\u00E1 actualizado."');
test_fragment(
'"\\x22\\x27",\'\\x22\\x27\',"\\x5c",\'\\x5c\',"\\xff and \\xzz","unicode \\u0000 \\u0022 \\u0027 \\u005c \\uffff \\uzzzz"',
'"\\x22\\x27", \'\\x22\\x27\', "\\x5c", \'\\x5c\', "\\xff and \\xzz", "unicode \\u0000 \\u0022 \\u0027 \\u005c \\uffff \\uzzzz"');
opts.unescape_strings = true;
test_fragment('"\\x41\\x42\\x01\\x43"', '"AB\\x01C"');
test_fragment('"\\x41\\x42\\u0001\\x43"', '"AB\\u0001C"');
test_fragment('"\\x41\\x42\\u{0001}\\x43"', '"AB\\u{0001}C"');
test_fragment('"\\x20\\x40\\x4a"', '" @J"');
test_fragment('"\\xff\\x40\\x4a"');
test_fragment('"\\u0072\\u016B\\u0137\\u012B\\u0074\\u0069\\u0073"', '"\u0072\u016B\u0137\u012B\u0074\u0069\u0073"');
test_fragment('"\\u{0072}\\u{016B}\\u{110000}\\u{137}\\u012B\\x74\\u{0000069}\\u{073}"', '"\u0072\u016B\\u{110000}\u0137\u012B\u0074\u0069\u0073"');
test_fragment('"Google Chrome est\\u00E1 actualizado."', '"Google Chrome está actualizado."');
test_fragment('"\\x22\\x27",\'\\x22\\x27\',"\\x5c",\'\\x5c\',"\\xff and \\xzz","unicode \\u0000 \\u0022 \\u0027 \\u005c \\uffff"',
'"\\"\\\'", \'\\"\\\'\', "\\\\", \'\\\\\', "\\xff and \\xzz", "unicode \\u0000 \\" \\\' \\\\ ' + unicode_char(0xffff) + '"');
test_fragment(
'"\\x22\\x27",\'\\x22\\x27\',"\\x5c",\'\\x5c\',"\\xff and \\xzz","unicode \\u0000 \\u0022 \\u0027 \\u005c \\uffff"',
'"\\"\\\'", \'\\"\\\'\', "\\\\", \'\\\\\', "\\xff and \\xzz", "unicode \\u0000 \\" \\\' \\\\ ' + unicode_char(0xffff) + '"');
// For error case, return the string unchanged
test_fragment('"\\x22\\x27",\'\\x22\\x27\',"\\x5c",\'\\x5c\',"\\xff and \\xzz","unicode \\u0000 \\u0022 \\u0027 \\u005c \\uffff \\uzzzz"',
test_fragment(
'"\\x22\\x27",\'\\x22\\x27\',"\\x5c",\'\\x5c\',"\\xff and \\xzz","unicode \\u0000 \\u0022 \\u0027 \\u005c \\uffff \\uzzzz"',
'"\\"\\\'", \'\\"\\\'\', "\\\\", \'\\\\\', "\\xff and \\xzz", "unicode \\u0000 \\u0022 \\u0027 \\u005c \\uffff \\uzzzz"');
reset_options();
Expand Down
19 changes: 17 additions & 2 deletions test/data/javascript/python.mustache
Original file line number Diff line number Diff line change
Expand Up @@ -78,17 +78,32 @@ class TestJSBeautifier(unittest.TestCase):
bt('"—"')
bt('"\\x41\\x42\\x43\\x01"', '"\\x41\\x42\\x43\\x01"')
bt('"\\u2022"', '"\\u2022"')
bt('"\\u{2022}"', '"\\u{2022}"')
bt('a = /\s+/')
#bt('a = /\\x41/','a = /A/')
bt('"\\u2022";a = /\s+/;"\\x41\\x42\\x43\\x01".match(/\\x41/);','"\\u2022";\na = /\s+/;\n"\\x41\\x42\\x43\\x01".match(/\\x41/);')
test_fragment('"\\x22\\x27",\'\\x22\\x27\',"\\x5c",\'\\x5c\',"\\xff and \\xzz","unicode \\u0000 \\u0022 \\u0027 \\u005c \\uffff \\uzzzz"', '"\\x22\\x27", \'\\x22\\x27\', "\\x5c", \'\\x5c\', "\\xff and \\xzz", "unicode \\u0000 \\u0022 \\u0027 \\u005c \\uffff \\uzzzz"')

test_fragment('"\\x41\\x42\\x01\\x43"')
test_fragment('"\\x41\\x42\\u0001\\x43"')
test_fragment('"\\x41\\x42\\u{0001}\\x43"')
test_fragment('"\\x20\\x40\\x4a"')
test_fragment('"\\xff\\x40\\x4a"')
test_fragment('"\\u0072\\u016B\\u0137\\u012B\\u0074\\u0069\\u0073"')
test_fragment('"\\u{0072}\\u{016B}\\u{110000}\\u{137}\\u012B\\x74\\u{0000069}\\u{073}"')
test_fragment('"Google Chrome est\\u00E1 actualizado."')
test_fragment(
'"\\x22\\x27",\'\\x22\\x27\',"\\x5c",\'\\x5c\',"\\xff and \\xzz","unicode \\u0000 \\u0022 \\u0027 \\u005c \\uffff \\uzzzz"',
'"\\x22\\x27", \'\\x22\\x27\', "\\x5c", \'\\x5c\', "\\xff and \\xzz", "unicode \\u0000 \\u0022 \\u0027 \\u005c \\uffff \\uzzzz"')

self.options.unescape_strings = True

bt('"\\x41\\x42\\x43\\x01"', '"ABC\\x01"')
bt('"\\x41\\x42\\x01\\x43"', '"AB\\x01C"')
bt('"\\x41\\x42\\u0001\\x43"', '"AB\\u0001C"')
bt('"\\x41\\x42\\u{0001}\\x43"', '"AB\\u{0001}C"')
test_fragment('"\\x20\\x40\\x4a"', '" @J"')
test_fragment('"\\xff\\x40\\x4a"')
test_fragment('"\\u0072\\u016B\\u0137\\u012B\\u0074\\u0069\\u0073"', six.u('"\u0072\u016B\u0137\u012B\u0074\u0069\u0073"'))
test_fragment('"\\u{0072}\\u{016B}\\u{110000}\\u{137}\\u012B\\x74\\u{0000069}\\u{073}"', six.u('"\u0072\u016B\\u{110000}\u0137\u012B\u0074\u0069\u0073"'))

bt('a = /\s+/')
test_fragment('"\\x22\\x27",\'\\x22\\x27\',"\\x5c",\'\\x5c\',"\\xff","unicode \\u0000 \\u0022 \\u0027 \\u005c \\uffff"',
Expand Down
14 changes: 14 additions & 0 deletions test/data/javascript/tests.js
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,20 @@ exports.test_data = {
}, {
input_: "var' + unicode_char(160) + unicode_char(3232) + '_' + unicode_char(3232) + ' = \"hi\";",
output: "var ' + unicode_char(3232) + '_' + unicode_char(3232) + ' = \"hi\";"
}, {
comment: 'Issue #2159: Invalid prettification of object with unicode escape character as object key - test scenario: object with unicode as key',
input: '{\\\\u{1d4b6}:"ascr"}',
output: [
'{',
' \\\\u{1d4b6}: "ascr"',
'}'
]
}, {
unchanged: [
"var \\\\u{E4}\\\\u{ca0}\\\\u{0cA0}\\\\u{000000Ca0} = {",
" \\\\u{ca0}rgerlich: true",
"};"
]
}]
}, {
name: "Test template and continuation strings",
Expand Down

0 comments on commit 44b7131

Please sign in to comment.