Skip to content

Commit

Permalink
Fix encoding canonicalization (fixes #156)
Browse files Browse the repository at this point in the history
  • Loading branch information
ashtuchkin committed Apr 7, 2018
1 parent 67836d5 commit 696be8a
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 1 deletion.
7 changes: 6 additions & 1 deletion lib/index.js
Expand Up @@ -65,7 +65,7 @@ iconv.getCodec = function getCodec(encoding) {
iconv.encodings = require("../encodings"); // Lazy load all encoding definitions.

// Canonicalize encoding name: strip all non-alphanumeric chars and appended year.
var enc = (''+encoding).toLowerCase().replace(/[^0-9a-z]|:\d{4}$/g, "");
var enc = iconv._canonicalizeEncoding(encoding);

// Traverse iconv.encodings to find actual codec.
var codecOptions = {};
Expand Down Expand Up @@ -108,6 +108,11 @@ iconv.getCodec = function getCodec(encoding) {
}
}

iconv._canonicalizeEncoding = function(encoding) {
// Canonicalize encoding name: strip all non-alphanumeric chars and appended year.
return (''+encoding).toLowerCase().replace(/:\d{4}$|[^0-9a-z]/g, "");
}

iconv.getEncoder = function getEncoder(encoding, options) {
var codec = iconv.getCodec(encoding),
encoder = new codec.encoder(options, codec);
Expand Down
10 changes: 10 additions & 0 deletions test/main-test.js
Expand Up @@ -84,3 +84,13 @@ describe("Generic UTF8-UCS2 tests", function() {
assert.strictEqual(iconv.encode("外国人", "latin1").toString(), "???");
});
});

describe("Canonicalize encoding function", function() {
it("works with numbers directly", function() {
assert.equal(iconv._canonicalizeEncoding(955), "955");
});

it("correctly strips year and non-alpha chars", function() {
assert.equal(iconv._canonicalizeEncoding("ISO_8859-5:1988"), "iso88595");
});
});

0 comments on commit 696be8a

Please sign in to comment.