Skip to content

Commit

Permalink
Tokenize numbers including decimal and exponent as part of one token.
Browse files Browse the repository at this point in the history
Fixes #488
  • Loading branch information
bitwiseman committed Sep 26, 2014
1 parent 03a1685 commit 3e6be0b
Show file tree
Hide file tree
Showing 4 changed files with 152 additions and 52 deletions.
87 changes: 61 additions & 26 deletions js/lib/beautify.js
Original file line number Diff line number Diff line change
Expand Up @@ -390,11 +390,11 @@

function allow_wrap_or_preserved_newline(force_linewrap) {
force_linewrap = (force_linewrap === undefined) ? false : force_linewrap;

if (output.just_added_newline()) {
return
}

if ((opt.preserve_newlines && current_token.wanted_newline) || force_linewrap) {
print_newline(false, true);
} else if (opt.wrap_line_length) {
Expand Down Expand Up @@ -1012,8 +1012,6 @@
// The conditional starts the statement if appropriate.
}

var space_before = true;
var space_after = true;
if (last_type === 'TK_RESERVED' && is_special_word(flags.last_text)) {
// "return" had a special handling in TK_WORD. Now we need to return the favor
output.space_before_token = true;
Expand All @@ -1022,7 +1020,7 @@
}

// hack for actionscript's import .*;
if (current_token.text === '*' && last_type === 'TK_DOT' && !last_last_text.match(/^\d+$/)) {
if (current_token.text === '*' && last_type === 'TK_DOT') {
print_token();
return;
}
Expand Down Expand Up @@ -1053,6 +1051,9 @@
allow_wrap_or_preserved_newline();
}

var space_before = true;
var space_after = true;

if (in_array(current_token.text, ['--', '++', '!', '~']) || (in_array(current_token.text, ['-', '+']) && (in_array(last_type, ['TK_START_BLOCK', 'TK_START_EXPR', 'TK_EQUALS', 'TK_OPERATOR']) || in_array(flags.last_text, Tokenizer.line_starters) || flags.last_text === ','))) {
// unary operators (and binary +/- pretending to be unary) special cases

Expand Down Expand Up @@ -1212,7 +1213,7 @@

this.remove_indent = function(indent_string, preindent_string) {
var splice_index = 0;

// skip empty lines
if (line_items.length === 0) {
return;
Expand Down Expand Up @@ -1262,7 +1263,7 @@
lines.push(this.current_line);
return true;
}

return false;
}

Expand Down Expand Up @@ -1297,7 +1298,7 @@
this.add_space_before_token();
this.current_line.push(printable_token);
}

this.add_space_before_token = function() {
if (this.space_before_token && this.current_line.get_item_count()) {
var last_output = this.current_line.last();
Expand Down Expand Up @@ -1372,8 +1373,7 @@
function tokenizer(input, opts, indent_string) {

var whitespace = "\n\r\t ".split('');
var wordchar = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_$'.split('');
var digits = '0123456789'.split('');
var digit = /[0-9]/;

var punct = ('+ - * / % & ++ -- = += -= *= /= %= == === != !== > < >= <= >> << >>> >>>= >>= <<= && &= | || ! ~ , : ? ^ ^= |= :: =>'
+' <%= <% %> <?= <? ?>').split(' '); // try to be a good boy and try not to break the markup language identifiers
Expand Down Expand Up @@ -1479,9 +1479,53 @@
parser_pos += 1;
}

// NOTE: because beautifier doesn't fully parse, it doesn't use acorn.isIdentifierStart.
// It just treats all identifiers and numbers and such the same.
if (acorn.isIdentifierChar(input.charCodeAt(parser_pos-1))) {
if (digit.test(c)) {
var allow_decimal = true;
var allow_e = true;
var local_digit = digit;

if (c === '0' && parser_pos < input_length && /[Xx]/.test(input.charAt(parser_pos))) {
// switch to hex number, no decimal or e, just hex digits
allow_decimal = false;
allow_e = false;
c += input.charAt(parser_pos);
parser_pos += 1;
local_digit = /[0123456789abcdefABCDEF]/
} else {
// we know this first loop will run. It keeps the logic simpler.
c = '';
parser_pos -= 1
}

// Add the digits
while (parser_pos < input_length && local_digit.test(input.charAt(parser_pos))) {
c += input.charAt(parser_pos);
parser_pos += 1;

if (allow_decimal && parser_pos < input_length && input.charAt(parser_pos) === '.') {
c += input.charAt(parser_pos);
parser_pos += 1;
allow_decimal = false;
}

if (allow_e && parser_pos < input_length && /[Ee]/.test(input.charAt(parser_pos))) {
c += input.charAt(parser_pos);
parser_pos += 1;

if (parser_pos < input_length && /[+-]/.test(input.charAt(parser_pos))) {
c += input.charAt(parser_pos);
parser_pos += 1;
}

allow_e = false;
allow_decimal = false;
}
}

return [c, 'TK_WORD'];
}

if (acorn.isIdentifierStart(input.charCodeAt(parser_pos-1))) {
if (parser_pos < input_length) {
while (acorn.isIdentifierChar(input.charCodeAt(parser_pos))) {
c += input.charAt(parser_pos);
Expand All @@ -1492,17 +1536,6 @@
}
}

// small and surprisingly unugly hack for 1E-10 representation
if (parser_pos !== input_length && c.match(/^[0-9]+[Ee]$/) && (input.charAt(parser_pos) === '-' || input.charAt(parser_pos) === '+')) {

var sign = input.charAt(parser_pos);
parser_pos += 1;

var t = tokenize_next();
c += sign + t[0];
return [c, 'TK_WORD'];
}

if (!(last_type === 'TK_DOT' ||
(last_type === 'TK_RESERVED' && in_array(last_text, ['set', 'get'])))
&& in_array(c, reserved_words)) {
Expand All @@ -1511,6 +1544,7 @@
}
return [c, 'TK_RESERVED'];
}

return [c, 'TK_WORD'];
}

Expand Down Expand Up @@ -1683,7 +1717,8 @@

if (sep === '/') {
// regexps may have modifiers /regexp/MOD , so fetch those, too
while (parser_pos < input_length && in_array(input.charAt(parser_pos), wordchar)) {
// Only [gim] are valid, but if the user puts in garbage, do what we can to take it.
while (parser_pos < input_length && acorn.isIdentifierStart(input.charCodeAt(parser_pos))) {
resulting_string += input.charAt(parser_pos);
parser_pos += 1;
}
Expand All @@ -1710,7 +1745,7 @@
// https://developer.mozilla.org/En/Sharp_variables_in_JavaScript
// http://mxr.mozilla.org/mozilla-central/source/js/src/jsscan.cpp around line 1935
var sharp = '#';
if (parser_pos < input_length && in_array(input.charAt(parser_pos), digits)) {
if (parser_pos < input_length && digit.test(input.charAt(parser_pos))) {
do {
c = input.charAt(parser_pos);
sharp += c;
Expand Down
18 changes: 18 additions & 0 deletions js/test/beautify-tests.js
Original file line number Diff line number Diff line change
Expand Up @@ -1122,6 +1122,7 @@ function run_beautifier_tests(test_obj, Urlencoded, js_beautify, html_beautify,
'Test_very_long_variable_name_this_should_never_wrap\n.but_this_can\n' +
'if (wraps_can_occur && inside_an_if_block) that_is_\n.okay();\n' +
'object_literal = {\n' +
' propertx: first_token + 12345678.99999E-6,\n' +
' property: first_token_should_never_wrap + but_this_can,\n' +
' propertz: first_token_should_never_wrap + !but_this_can,\n' +
' proper: "first_token_should_never_wrap" + "but_this_can"\n' +
Expand All @@ -1134,6 +1135,7 @@ function run_beautifier_tests(test_obj, Urlencoded, js_beautify, html_beautify,
' Test_very_long_variable_name_this_should_never_wrap\n.but_this_can\n' +
' if (wraps_can_occur && inside_an_if_block) that_is_\n.okay();\n' +
' object_literal = {\n' +
' propertx: first_token + 12345678.99999E-6,\n' +
' property: first_token_should_never_wrap + but_this_can,\n' +
' propertz: first_token_should_never_wrap + !but_this_can,\n' +
' proper: "first_token_should_never_wrap" + "but_this_can"\n' +
Expand All @@ -1150,6 +1152,7 @@ function run_beautifier_tests(test_obj, Urlencoded, js_beautify, html_beautify,
'Test_very_long_variable_name_this_should_never_wrap.but_this_can\n' +
'if (wraps_can_occur && inside_an_if_block) that_is_.okay();\n' +
'object_literal = {\n' +
' propertx: first_token + 12345678.99999E-6,\n' +
' property: first_token_should_never_wrap + but_this_can,\n' +
' propertz: first_token_should_never_wrap + !but_this_can,\n' +
' proper: "first_token_should_never_wrap" + "but_this_can"\n' +
Expand All @@ -1164,6 +1167,7 @@ function run_beautifier_tests(test_obj, Urlencoded, js_beautify, html_beautify,
'Test_very_long_variable_name_this_should_never_wrap.but_this_can\n' +
'if (wraps_can_occur && inside_an_if_block) that_is_.okay();\n' +
'object_literal = {\n' +
' propertx: first_token + 12345678.99999E-6,\n' +
' property: first_token_should_never_wrap + but_this_can,\n' +
' propertz: first_token_should_never_wrap + !but_this_can,\n' +
' proper: "first_token_should_never_wrap" + "but_this_can"\n' +
Expand All @@ -1181,6 +1185,8 @@ function run_beautifier_tests(test_obj, Urlencoded, js_beautify, html_beautify,
'if (wraps_can_occur &&\n' +
' inside_an_if_block) that_is_.okay();\n' +
'object_literal = {\n' +
' propertx: first_token +\n' +
' 12345678.99999E-6,\n' +
' property: first_token_should_never_wrap +\n' +
' but_this_can,\n' +
' propertz: first_token_should_never_wrap +\n' +
Expand All @@ -1202,6 +1208,8 @@ function run_beautifier_tests(test_obj, Urlencoded, js_beautify, html_beautify,
'if (wraps_can_occur &&\n' +
' inside_an_if_block) that_is_.okay();\n' +
'object_literal = {\n' +
' propertx: first_token +\n' +
' 12345678.99999E-6,\n' +
' property: first_token_should_never_wrap +\n' +
' but_this_can,\n' +
' propertz: first_token_should_never_wrap +\n' +
Expand All @@ -1224,6 +1232,8 @@ function run_beautifier_tests(test_obj, Urlencoded, js_beautify, html_beautify,
' if (wraps_can_occur &&\n' +
' inside_an_if_block) that_is_.okay();\n' +
' object_literal = {\n' +
' propertx: first_token +\n' +
' 12345678.99999E-6,\n' +
' property: first_token_should_never_wrap +\n' +
' but_this_can,\n' +
' propertz: first_token_should_never_wrap +\n' +
Expand All @@ -1245,6 +1255,7 @@ function run_beautifier_tests(test_obj, Urlencoded, js_beautify, html_beautify,
'if (wraps_can_occur && inside_an_if_block) that_is_\n' +
' .okay();\n' +
'object_literal = {\n' +
' propertx: first_token + 12345678.99999E-6,\n' +
' property: first_token_should_never_wrap + but_this_can,\n' +
' propertz: first_token_should_never_wrap + !but_this_can,\n' +
' proper: "first_token_should_never_wrap" + "but_this_can"\n' +
Expand All @@ -1261,6 +1272,7 @@ function run_beautifier_tests(test_obj, Urlencoded, js_beautify, html_beautify,
'if (wraps_can_occur && inside_an_if_block) that_is_\n' +
' .okay();\n' +
'object_literal = {\n' +
' propertx: first_token + 12345678.99999E-6,\n' +
' property: first_token_should_never_wrap + but_this_can,\n' +
' propertz: first_token_should_never_wrap + !but_this_can,\n' +
' proper: "first_token_should_never_wrap" + "but_this_can"\n' +
Expand All @@ -1280,6 +1292,8 @@ function run_beautifier_tests(test_obj, Urlencoded, js_beautify, html_beautify,
' inside_an_if_block) that_is_\n' +
' .okay();\n' +
'object_literal = {\n' +
' propertx: first_token +\n' +
' 12345678.99999E-6,\n' +
' property: first_token_should_never_wrap +\n' +
' but_this_can,\n' +
' propertz: first_token_should_never_wrap +\n' +
Expand All @@ -1302,6 +1316,8 @@ function run_beautifier_tests(test_obj, Urlencoded, js_beautify, html_beautify,
' inside_an_if_block) that_is_\n' +
' .okay();\n' +
'object_literal = {\n' +
' propertx: first_token +\n' +
' 12345678.99999E-6,\n' +
' property: first_token_should_never_wrap +\n' +
' but_this_can,\n' +
' propertz: first_token_should_never_wrap +\n' +
Expand All @@ -1325,6 +1341,8 @@ function run_beautifier_tests(test_obj, Urlencoded, js_beautify, html_beautify,
' inside_an_if_block) that_is_\n' +
' .okay();\n' +
' object_literal = {\n' +
' propertx: first_token +\n' +
' 12345678.99999E-6,\n' +
' property: first_token_should_never_wrap +\n' +
' but_this_can,\n' +
' propertz: first_token_should_never_wrap +\n' +
Expand Down
Loading

0 comments on commit 3e6be0b

Please sign in to comment.