Skip to content

Commit

Permalink
updates for string collation
Browse files Browse the repository at this point in the history
Former-commit-id: 55b936b5943f4c9f9401601c896682eea1dfcbdf [formerly 5555f27581adeafb9cdaf937468767cdd1b69ede]
Former-commit-id: c3ddd96c19d0c8663583c6b97d9e4c82a7eb3b0c
  • Loading branch information
andrewplummer committed Feb 27, 2012
1 parent 2a810de commit 3c70e23
Show file tree
Hide file tree
Showing 7 changed files with 34 additions and 138 deletions.
91 changes: 29 additions & 62 deletions lib/core.js
Expand Up @@ -612,7 +612,6 @@
***/



// Basic array internal methods

function arrayEach(arr, fn, startIndex, loop, sparse) {
Expand Down Expand Up @@ -789,30 +788,41 @@
return result;
}

function normalizeAndTransform() {
var transformed = transformArgument.apply(null, arguments);
if(transformed && transformed.normalize) transformed = transformed.normalize();
if(transformed && transformed.toLowerCase) transformed = transformed.toLowerCase();
return transformed;
}

var basicCollation = 'ÁÀĀȧȦȃȂȁȀǻǺaÂÃÄāĂăąåäãâáàĄÅǍǎǠǟǡǞAbBčĊċĉçČCĈcćĆÇĎDďdĒĚěĘėÈÊȄȨȩÉĖĕęeëêéèEēĔËȅȆȇfFģĢġĠğĝǦĜGǴĞgǵǧHȟĤȞĥhĬĨĩȋîIȊȉȈǐÏÎÍÌĪīǏiïíĭìİįıĮjĵǰĴJǨǩĶKkķľLĽļĻĺĹlMmǸñnǹńŅÑņŇňNŃȫȰŎōȌȎŌǭǬŐőȍȏơǪǒǑƠȪȬȭȮȯŏȱǫÕôóòÒÓÔÖoõöOpPQqȒřŘrŗŔŕŖȓȑȐRßșȘšŠşŞŝŜsSſśŚŢţTťtŤțȚǓǙǘƯưuǗŰǜUǖǕǔǚȔȕųŲűȗůŮŭŬūǛũŨȖŪúûÚÛÙùÜüVvwŵWŴXxŷŶŸȲȳÝYyýÿŹźŻżŽžzZ';
// Alphanumeric collation helpers

array['AlphanumericSortCollation'] = 'ÁÀĀȧȦȃȂȁȀǻǺaÂÃÄāĂăąåäãâáàĄÅǍǎǠǟǡǞAbBčĊċĉçČCĈcćĆÇĎDďdĒĚěĘėÈÊȄȨȩÉĖĕęeëêéèEēĔËȅȆȇfFģĢġĠğĝǦĜGǴĞgǵǧHȟĤȞĥhĬĨĩȋîIȊȉȈǐÏÎÍÌĪīǏiïíĭìİįıĮjĵǰĴJǨǩĶKkķľLĽļĻĺĹlMmǸñnǹńŅÑņŇňNŃȫȰŎōȌȎŌǭǬŐőȍȏơǪǒǑƠȪȬȭȮȯŏȱǫÕôóòÒÓÔÖoõöOpPQqȒřŘrŗŔŕŖȓȑȐRßșȘšŠşŞŝŜsSſśŚŢţTťtŤțȚǓǙǘƯưuǗŰǜUǖǕǔǚȔȕųŲűȗůŮŭŬūǛũŨȖŪúûÚÛÙùÜüVvwŵWŴXxŷŶŸȲȳÝYyýÿŹźŻżŽžzZ';

function collateStrings(a, b) {
var aValue, bValue, index = 0;
a = getCollationReadyString(a);
b = getCollationReadyString(b);
while(aValue !== 0 && bValue !== 0 && aValue === bValue) {
aValue = getCollationValue(a, index);
bValue = getCollationValue(b, index);
if(aValue === -1 || bValue === -1) {
aValue = a.charCodeAt(index) || 0;
bValue = b.charCodeAt(index) || 0;
}
index += 1;
}
if(aValue === bValue) return 0;
return aValue < bValue ? -1 : 1;
}

function getCollationReadyString(str) {
return str.remove(Array['AlphanumericSortIgnore']);
}

function getCollatedValue(str, index) {
var char = str.charAt(index), collationIndex;
function getCollationValue(str, index) {
var char = str.charAt(index);
if(!char) {
return 0;
} else {
collationIndex = basicCollation.indexOf(char);
return collationIndex === -1 ? str.charCodeAt(index) : collationIndex;
return array['AlphanumericSortCollation'].indexOf(char);
}
}

function canCompareString(str) {
return object.isString(str) && str.compare;
}

extend(array, false, false, {

/***
Expand Down Expand Up @@ -1709,7 +1719,7 @@
* @method sortBy(<map>, [desc] = false)
* @returns Array
* @short Sorts the array by <map>.
* @extra <map> may be a function or a string acting as a shortcut. [desc] will sort the array in descending order.
* @extra <map> may be a function or a string acting as a shortcut. [desc] will sort the array in descending order. When sorting on strings a simplified form of UTF8 collation is used, specified in %Array.AlphanumericSortCollation%. Characters to be ignored are specified in %Array.AlphanumericSortIgnore%. For more information see @array_sorting.
* @example
*
* ['world','a','new'].sortBy('length') -> ['a','new','world']
Expand All @@ -1725,8 +1735,8 @@
var aProperty, bProperty, comp;
aProperty = transformArgument(a, map, arr, [a]);
bProperty = transformArgument(b, map, arr, [b]);
if(canCompareString(aProperty) && canCompareString(bProperty)) {
comp = aProperty.compare(bProperty);
if(object.isString(aProperty) && object.isString(bProperty)) {
comp = collateStrings(aProperty, bProperty);
} else if(aProperty === bProperty) {
comp = 0;
} else {
Expand Down Expand Up @@ -2277,22 +2287,6 @@
***/
'hex': function(pad) {
return this.pad(pad || 1, false, 16);
},

/***
* @method compare(<num>)
* @returns Number
* @short Performs a numeric comparison against the number.
* @extra This method is also defined on %String% and %Date%, and is useful when performing complex sort operations where the type isn't known.
* @example
*
* (255).compare(254) -> 1;
* (245).compare(254) -> -9;
* (0).compare(0) -> 0;
*
***/
'compare': function(num) {
return this - Number(num);
}

});
Expand Down Expand Up @@ -3440,33 +3434,6 @@
return this.replace(/\{(.+?)\}/g, function(m, key) {
return hasOwnProperty(assign, key) ? assign[key] : m;
});
},

/***
* @method compare(<str>, [ignore] = false)
* @returns Number
* @short Performs a lexical (alphabetic) comparison against the number.
* @extra This method is also defined on %Number% and %Date%, and is useful when performing complex sort operations where the type isn't known. If [ignore] is %true%, will ignore any non-alphanumeric character when performing comparison. [ignore] can also be a regexp.
* @example
*
* ('a').compare('b') -> -1;
* ('b').compare('a') -> 1;
* ('a').compare('a') -> 0;
* ('a').compare('@a', true) -> 0;
*
***/
'compare': function(subject) {
var aValue, bValue, index = 0, str;
str = this.remove(Array.AlphanumericSortIgnore);
subject = subject.replace(Array.AlphanumericSortIgnore);
console.info(str, subject);
while(aValue !== 0 && bValue !== 0 && aValue === bValue) {
aValue = getCollatedValue(str, index);
bValue = getCollatedValue(subject, index);
index += 1;
}
if(aValue === bValue) return 0;
return aValue < bValue ? -1 : 1;
}

});
Expand Down
14 changes: 0 additions & 14 deletions lib/dates.js
Expand Up @@ -2129,20 +2129,6 @@
***/
'clone': function() {
return new date(this.getTime());
},

/***
* @method compare(<obj>)
* @returns Number
* @short Performs a numeric comparison against the date.
* @extra This method is also defined on %String% and %Number%, and is useful when performing complex sort operations where the type isn't known.
* @example
*
* Date.create('1 day ago').compare('today') -> -864000;
*
***/
'compare': function(obj) {
return this - createDate(arguments);
}

});
Expand Down
5 changes: 2 additions & 3 deletions unit_tests/environments/sugar/array.js
Expand Up @@ -1692,12 +1692,11 @@ test('Array', function () {
equal(arr.sortBy(), arr, 'Array#sortBy | spaces are counted');


Array.AlphanumericSortIgnore = '#(';
Array.AlphanumericSortIgnore = /[#(]/g;

arr = ['fooa','#foob','(fooc'];
equal(arr.sortBy(), arr, 'Array#sortBy | special chars can be ignored');
equal(arr.sortBy(), arr, 'Array#sortBy | special chars are ignored by default');

Array.AlphanumericSortIgnore = null;

});

7 changes: 0 additions & 7 deletions unit_tests/environments/sugar/date.js
Expand Up @@ -1906,13 +1906,6 @@ test('Date', function () {
dateEqual(d.set({ month: 1, day: 3 }), new Date(2011, 1, 3), 'Date#create | set will also not cause date traversal');


// Date#compare

equal(new Date(1999, 1, 3).compare(new Date(1999, 1, 2)), (1).day(), 'Date#compare | 1 day ago returns 1 day in ms');
equal(new Date(1999, 1, 3).compare(new Date(1999, 1, 3)), 0, 'Date#compare | 1 day ago returns 1 day in ms');
equal(new Date(1999, 1, 3).compare(new Date(1999, 1, 4)), (-1).day(), 'Date#compare | 1 day from now returns -1 day in ms');



// Number methods

Expand Down
25 changes: 0 additions & 25 deletions unit_tests/environments/sugar/number.js
Expand Up @@ -326,31 +326,6 @@ test('Number', function () {
equal((2.5).hex(4), '0002.8', 'Number#hex | padding 4 places | 2.8')


// Number#compare

equal((0).compare(0), 0, 'Number#compare | 0 is equal to 0');
equal((0).compare(-1), 1, 'Number#compare | 0 is greater than -1');
equal((0).compare(1), -1, 'Number#compare | 0 is less than 1');
equal((1).compare(1), 0, 'Number#compare | 1 is equal to 1');
equal((1).compare(2), -1, 'Number#compare | 1 is less than 2');
equal((1).compare(0), 1, 'Number#compare | 1 is greater than than 0');
equal((5).compare(15), -10, 'Number#compare | 5 is less than than 15');
equal((15).compare(5), 10, 'Number#compare | 15 is greater than than 5');

equal((0).compare('0'), 0, 'Number#compare | strings are coerced | 0 is equal to 0');
equal((0).compare('-1'), 1, 'Number#compare | strings are coerced | 0 is greater than -1');
equal((0).compare('1'), -1, 'Number#compare | strings are coerced | 0 is less than 1');
equal((1).compare('1'), 0, 'Number#compare | strings are coerced | 1 is equal to 1');
equal((1).compare('2'), -1, 'Number#compare | strings are coerced | 1 is less than 2');
equal((1).compare('0'), 1, 'Number#compare | strings are coerced | 1 is greater than than 0');
equal((5).compare('15'), -10, 'Number#compare | strings are coerced | 5 is less than than 15');
equal((15).compare('5'), 10, 'Number#compare | strings are coerced | 15 is greater than than 5');

equal((15).compare('wasabi'), NaN, 'Number#compare | cannot compare numbers to strings');
equal((15).compare({ foo: 'bar' }), NaN, 'Number#compare | cannot compare numbers to objects');
equal((15).compare(/wasabi/), NaN, 'Number#compare | cannot compare numbers to regexps');
equal((15).compare(new Date) < 0, true, 'Number#compare | Dates are implicitly converted to numbers');

// Number#isInteger

equal((15).isInteger(), true, 'Number#isInteger | 15');
Expand Down
1 change: 1 addition & 0 deletions unit_tests/environments/sugar/regexp.js
Expand Up @@ -11,6 +11,7 @@ test('RegExp', function () {
equal(RegExp.escape('?'), '\\?', 'RegExp#escape | ?');
equal(RegExp.escape('\?'), '\\?', 'RegExp#escape | one slash and ?');
equal(RegExp.escape('\\?'), '\\\\\\?', 'RegExp#escape | two slashes and ?');
equal(RegExp.escape('\\?'), '\\\\\\?', 'RegExp#escape | two slashes and ?');

r = /foobar/;
n = r.setFlags('gim');
Expand Down
29 changes: 2 additions & 27 deletions unit_tests/environments/sugar/string.js
Expand Up @@ -360,6 +360,8 @@ test('String', function () {
equal('schfifty five'.remove(/f/), 'schifty five', 'String#remove | /f/');
equal('schfifty five'.remove(/f/g), 'schity ive', 'String#remove | /f/g');
equal('schfifty five'.remove(/[a-f]/g), 'shity iv', 'String#remove | /[a-f]/');
equal('?'.remove('?'), '', 'String#remove | strings have tokens escaped');
equal('?('.remove('?('), '', 'String#remove | strings have all tokens escaped');

equal('schfifty'.insert(' five'), 'schfifty five', 'String#insert | schfifty five');
equal('dopamine'.insert('e', 3), 'dopeamine', 'String#insert | dopeamine');
Expand Down Expand Up @@ -1196,31 +1198,4 @@ test('String', function () {
equal('Hello, {empty}'.assign({ empty: '' }), 'Hello, ', 'String#assign | empty string as object');


// String#compare

equal(('a').compare('a'), 0, 'String#compare | a and a results in 0');
equal(('a').compare('b'), -1, 'String#compare | a and b results in 0');
equal(('b').compare('a'), 1, 'String#compare | b and a results in 1');
equal(('z').compare('a'), 1, 'String#compare | z and a results in 1');
equal(('a').compare('z'), -1, 'String#compare | a and z results in 0');

equal(('A').compare('a'), -1, 'String#compare | caps come before lower case');
equal(('_').compare('-'), 1, 'String#compare | special chars are also compared by code point');

equal(('advertising').compare('@advertising', true), 0, 'String#compare | all special characters can be ignored');
equal(('advertising').compare('@advertising', '@'), 0, 'String#compare | specific characters can be ignored');
equal(('advertising').compare('@advertising', '#'), 1, 'String#compare | only specific characters are ignored');

equal(('@advertising').compare('advertising', true), 0, 'String#compare | inverse ignore is also true');
equal(('@advertising').compare('advertising', '@'), 0, 'String#compare | inverse ignore is also true');

equal(('2advertising').compare('advertising', true), -1, 'String#compare | numeric characters still count');

equal(('1').compare(0), 1, 'String#compare | numbers are coerced | 0');
equal(('1').compare(1), 0, 'String#compare | numbers are coerced | 1');
equal(('1').compare(2), -1, 'String#compare | numbers are coerced | 2');
equal(('1').compare(87), -1, 'String#compare | numbers are coerced | 87');

equal(('80').compare(9), -1, 'String#compare | in lexical comparison, 80 comes before 9');

});

0 comments on commit 3c70e23

Please sign in to comment.