Skip to content

Commit

Permalink
Version 0.2.4: Optimized single-byte encodings (4.5x performance incr…
Browse files Browse the repository at this point in the history
…ease).
  • Loading branch information
ashtuchkin committed Aug 24, 2012
1 parent 5e166f0 commit d57c271
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 37 deletions.
8 changes: 4 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ iconv-lite - pure javascript character encoding conversion
* Pure javascript. Doesn't need native code compilation.
* Easy API.
* Works on Windows and in sandboxed environments like [Cloud9](http://c9.io).
* Encoding is faster than node-iconv, decoding slightly slower (see below for performance comparison).
* Encoding is much faster than node-iconv (see below for performance comparison).

## Usage

Expand Down Expand Up @@ -39,10 +39,10 @@ Not supported yet: Big5, EUC family, Shift_JIS.
Comparison with node-iconv module (1000x256kb, on Ubuntu 12.04, Core i5/2.5 GHz, Node v0.8.7).
Note: your results may vary, so please always check on your hardware.

operation iconv@1.1.3 iconv-lite@0.2.3
operation iconv@1.1.3 iconv-lite@0.2.4
----------------------------------------------------------
encode('win1251') ~36 Mb/s ~50 Mb/s
decode('win1251') ~38 Mb/s ~29 Mb/s
encode('win1251') ~36 Mb/s ~230 Mb/s
decode('win1251') ~38 Mb/s ~130 Mb/s


## Notes
Expand Down
71 changes: 39 additions & 32 deletions index.js
Original file line number Diff line number Diff line change
Expand Up @@ -58,13 +58,13 @@ var iconv = module.exports = {
// Codepage single-byte encodings.
singlebyte: function(options) {
// Prepare chars if needed
if (!options.chars || (options.chars.length !== 128 && options.chars.length !== 256))
throw new Error("Encoding '"+options.type+"' has incorrect 'chars' (must be of len 128 or 256)");

if (options.chars.length === 128)
options.chars = asciiString + options.chars;

if (!options.charsBuf) {
if (!options.chars || (options.chars.length !== 128 && options.chars.length !== 256))
throw new Error("Encoding '"+options.type+"' has incorrect 'chars' (must be of len 128 or 256)");

if (options.chars.length === 128)
options.chars = asciiString + options.chars;

options.charsBuf = new Buffer(options.chars, 'ucs2');
}

Expand All @@ -76,33 +76,14 @@ var iconv = module.exports = {
for (var i = 0; i < options.chars.length; i++)
options.revCharsBuf[options.chars.charCodeAt(i)] = i;
}

return {
toEncoding: function(str) {
str = ensureString(str);

var buf = new Buffer(str.length);
var revCharsBuf = options.revCharsBuf;
for (var i = 0; i < str.length; i++)
buf[i] = revCharsBuf[str.charCodeAt(i)];

return buf;
},
fromEncoding: function(buf) {
buf = ensureBuffer(buf);

// Strings are immutable in JS -> we use ucs2 buffer to speed up computations.
var charsBuf = options.charsBuf;
var newBuf = new Buffer(buf.length*2);
var idx1 = 0, idx2 = 0;
for (var i = 0, _len = buf.length; i < _len; i++) {
idx1 = buf[i]*2; idx2 = i*2;
newBuf[idx2] = charsBuf[idx1];
newBuf[idx2+1] = charsBuf[idx1+1];
}
return newBuf.toString('ucs2');
}
};
// Seems that V8 is not optimizing functions if they are created again and again.
// TODO: Make same optimization for other encodings.
toEncoding: toSingleByteEncoding,
fromEncoding: fromSingleByteEncoding,
options: options,
}
},

// Codepage double-byte encodings.
Expand Down Expand Up @@ -171,6 +152,32 @@ var iconv = module.exports = {
}
};

function toSingleByteEncoding(str) {
str = ensureString(str);

var buf = new Buffer(str.length);
var revCharsBuf = this.options.revCharsBuf;
for (var i = 0; i < str.length; i++)
buf[i] = revCharsBuf[str.charCodeAt(i)];

return buf;
}

function fromSingleByteEncoding(buf) {
buf = ensureBuffer(buf);

// Strings are immutable in JS -> we use ucs2 buffer to speed up computations.
var charsBuf = this.options.charsBuf;
var newBuf = new Buffer(buf.length*2);
var idx1 = 0, idx2 = 0;
for (var i = 0, _len = buf.length; i < _len; i++) {
idx1 = buf[i]*2; idx2 = i*2;
newBuf[idx2] = charsBuf[idx1];
newBuf[idx2+1] = charsBuf[idx1+1];
}
return newBuf.toString('ucs2');
}

// Add aliases to convert functions
iconv.encode = iconv.toEncoding;
iconv.decode = iconv.fromEncoding;
Expand Down
2 changes: 1 addition & 1 deletion package.json
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
{
"name": "iconv-lite",
"description": "Convert character encodings in pure javascript.",
"version": "0.2.3",
"version": "0.2.4",

"keywords": ["iconv", "convert", "charset"],
"author": "Alexander Shtuchkin <ashtuchkin@gmail.com>",
Expand Down

0 comments on commit d57c271

Please sign in to comment.