Skip to content

Commit 4ddec5e

Browse files
committed
util: approx 48% faster latinise()
1 parent 55ff854 commit 4ddec5e

File tree

1 file changed

+54
-51
lines changed

1 file changed

+54
-51
lines changed

js/misc/util.js

Lines changed: 54 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -337,57 +337,64 @@ function fixupPCIDescription(desc) {
337337
return out.join(' ');
338338
}
339339

340-
// key: normal char, value: regex containing all chars with accents
341-
const _LATINISE_REGEX = {
340+
// key: normal char, value: array containing all chars with accents
341+
const _LATINISE_MAP = {
342342
//uppercase
343-
A: /[\xC0-\xC5\u0100\u0102\u0104]/g,
344-
AE: /\xC6/g,
345-
C: /[\xC7\u0106\u0108\u010A\u010C]/g,
346-
D: /[\xD0\u010E\u0110]/g,
347-
E: /[\xC8-\xCB\u0112\u0114\u0116\u0118\u011A]/g,
348-
G: /[\u011C\u011E\u0120\u0122]/g,
349-
H: /[\u0124\u0126]/g,
350-
I: /[\xCC-\xCF\u0128\u012A\u012C\u0130]/g,
351-
IJ: /\u0132/g,
352-
J: /[\u012E\u0134]/g,
353-
K: /\u0136/g,
354-
L: /[\u0139\u013B\u013D\u0130F\u0141]/g,
355-
N: /[\xD1\u0143\u0145\u0147\u014A]/g,
356-
O: /[\xD2-\xD6\xD8\u014C\u014E\u0150]/g,
357-
OE: /\u0152/g,
358-
R: /[\u0154\u0156\u0158]/g,
359-
S: /[\u015A\u015C\u015E\u0160]/g,
360-
T: /[\u0162\u0164\u0166]/g,
361-
U: /[\xD9-\xDC\u0168\u016A\u016C\u016E\u0170\u0172]/g,
362-
W: /\u0174/g,
363-
Y: /[\xDD\u0176\u0178]/g,
364-
Z: /[\u0179\u017B\u017D]/g,
343+
A: ['\xC0', '\xC1', '\xC2', '\xC3', '\xC4', '\xC5', '\u0100', '\u0102', '\u0104'],
344+
AE: ['\xC6'],
345+
C: ['\xC7', '\u0106', '\u0108', '\u010A', '\u010C'],
346+
D: ['\xD0', '\u010E', '\u0110'],
347+
E: ['\xC8', '\xC9', '\xCA', '\xCB', '\u0112', '\u0114', '\u0116', '\u0118', '\u011A'],
348+
G: ['\u011C', '\u011E', '\u0120', '\u0122'],
349+
H: ['\u0124', '\u0126'],
350+
I: ['\xCC', '\xCD', '\xCE', '\xCF', '\u0128', '\u012A', '\u012C', '\u0130'],
351+
IJ: ['\u0132'],
352+
J: ['\u012E', '\u0134'],
353+
K: ['\u0136'],
354+
L: ['\u0139', '\u013B', '\u013D', '\u0130F', '\u0141'],
355+
N: ['\xD1', '\u0143', '\u0145', '\u0147', '\u014A'],
356+
O: ['\xD2', '\xD3', '\xD4', '\xD5', '\xD6', '\xD8', '\u014C', '\u014E', '\u0150'],
357+
OE: ['\u0152'],
358+
R: ['\u0154', '\u0156', '\u0158'],
359+
S: ['\u015A', '\u015C', '\u015E', '\u0160'],
360+
T: ['\u0162', '\u0164', '\u0166'],
361+
U: ['\xD9', '\xDA', '\xDB', '\xDC', '\u0168', '\u016A', '\u016C', '\u016E', '\u0170', '\u0172'],
362+
W: ['\u0174'],
363+
Y: ['\xDD', '\u0176', '\u0178'],
364+
Z: ['\u0179', '\u017B', '\u017D'],
365365

366366
//lowercase
367-
a: /[\xE0-\xE5\u0101\u0103\u0105]/g,
368-
ae: /\xE6/g,
369-
c: /[\xE7\u0107\u0109\u010B\u010D]/g,
370-
d: /[\u010F\u0111]/g,
371-
e: /[\xE8-\xEB\u0113\u0115\u0117\u0119\u011B]/g,
372-
g: /[\u011D\u011F\u0121\u0123]/g,
373-
h: /[\u0125\u0127]/g,
374-
i: /[\xEC-\xEF\u0129\u012B\u012D\u0131]/g,
375-
ij: /\u0133/g,
376-
j: /[\u012F\u0135]/g,
377-
k: /[\u0137\u0138]/g,
378-
l: /[\u013A\u013C\u013E\u0140\u0142]/g,
379-
n: /[\xF1\u0144\u0146\u0148\u0149\u014B]/g,
380-
o: /[\xF2-\xF6\xF8\u014D\u014F\u0151]/g,
381-
oe: /\u0153/g,
382-
r: /[\u0155\u0157\u0159]/g,
383-
s: /[\u015B\u015D\u015F\u0161]/g,
384-
t: /[\u0163\u0165\u0167]/g,
385-
u: /[\xF9-\xFC\u0169\u016B\u016D\u016F\u0171\u0173]/g,
386-
w: /\u0175/g,
387-
y: /[\xFD\xFF\u0177]/g,
388-
z: /[\u017A\u017C\u017E]/g
367+
a: ['\xE0', '\xE1', '\xE2', '\xE3', '\xE4', '\xE5', '\u0101', '\u0103', '\u0105'],
368+
ae: ['\xE6'],
369+
c: ['\xE7', '\u0107', '\u0109', '\u010B', '\u010D'],
370+
d: ['\u010F', '\u0111'],
371+
e: ['\xE8', '\xE9', '\xEA', '\xEB', '\u0113', '\u0115', '\u0117', '\u0119', '\u011B'],
372+
g: ['\u011D', '\u011F', '\u0121', '\u0123'],
373+
h: ['\u0125', '\u0127'],
374+
i: ['\xEC', '\xED', '\xEE', '\xEF', '\u0129', '\u012B', '\u012D', '\u0131'],
375+
ij: ['\u0133'],
376+
j: ['\u012F', '\u0135'],
377+
k: ['\u0137', '\u0138'],
378+
l: ['\u013A', '\u013C', '\u013E', '\u0140', '\u0142'],
379+
n: ['\xF1', '\u0144', '\u0146', '\u0148', '\u0149', '\u014B'],
380+
o: ['\xF2', '\xF3', '\xF4', '\xF5', '\xF6', '\xF8', '\u014D', '\u014F', '\u0151'],
381+
oe: ['\u0153'],
382+
r: ['\u0155', '\u0157', '\u0159'],
383+
s: ['\u015B', '\u015D', '\u015F', '\u0161'],
384+
t: ['\u0163', '\u0165', '\u0167'],
385+
u: ['\xF9', '\xFA', '\xFB', '\xFC', '\u0169', '\u016B', '\u016D', '\u016F', '\u0171', '\u0173'],
386+
w: ['\u0175'],
387+
y: ['\xFD', '\xFF', '\u0177'],
388+
z: ['\u017A', '\u017C', '\u017E'],
389+
389390
};
390391

392+
const _LATINISE_LUT = {};
393+
// generate look up table for accent char -> latin char
394+
each(_LATINISE_MAP, (chars, latinChar) => each(chars, char => _LATINISE_LUT[char] = latinChar));
395+
396+
// generate single capture group regex containting all accent chars
397+
const _LATINISE_REGEX = new RegExp(`(${Object.keys(_LATINISE_LUT).join('|')})`, 'g');
391398

392399
/**
393400
* latinise:
@@ -396,11 +403,7 @@ const _LATINISE_REGEX = {
396403
* Returns (string): @string, replaced accented chars
397404
*/
398405
function latinise(string){
399-
//call every regex to replace chars
400-
for(var i in _LATINISE_REGEX){
401-
string = string.replace(_LATINISE_REGEX[i], i);
402-
}
403-
return string;
406+
return string.replace(_LATINISE_REGEX, (char) => _LATINISE_LUT[char]);
404407
}
405408

406409
/**

0 commit comments

Comments
 (0)