Skip to content

Commit

Permalink
Split astral plane numeric entities into surrogate pairs
Browse files Browse the repository at this point in the history
Closes #97
  • Loading branch information
isaacs committed Jan 2, 2014
1 parent 2dcb936 commit 51b27e7
Show file tree
Hide file tree
Showing 2 changed files with 68 additions and 1 deletion.
57 changes: 56 additions & 1 deletion lib/sax.js
Expand Up @@ -904,7 +904,8 @@ function parseEntity (parser) {
strictFail(parser, "Invalid character entity")
return "&"+parser.entity + ";"
}
return String.fromCharCode(num)

return String.fromCodePoint(num)
}

function write (chunk) {
Expand Down Expand Up @@ -1352,4 +1353,58 @@ function write (chunk) {
return parser
}

/*! http://mths.be/fromcodepoint v0.1.0 by @mathias */

This comment has been minimized.

Copy link
@mathiasbynens

mathiasbynens Jan 2, 2014

Perhaps a more npm-ish way to do this would be:

  1. npm install string.prototype.fromcodepoint --save
  2. require('string.prototype.fromcodepoint');

Let me know if you’d like a pull request for that.

This comment has been minimized.

Copy link
@isaacs

isaacs Jan 2, 2014

Author Owner

I would, but sax is used in browsers etc. lll make a build script for it soon.

if (!String.fromCodePoint) {
(function() {
var stringFromCharCode = String.fromCharCode;
var floor = Math.floor;
var fromCodePoint = function() {
var MAX_SIZE = 0x4000;
var codeUnits = [];
var highSurrogate;
var lowSurrogate;
var index = -1;
var length = arguments.length;
if (!length) {
return '';
}
var result = '';
while (++index < length) {
var codePoint = Number(arguments[index]);
if (
!isFinite(codePoint) || // `NaN`, `+Infinity`, or `-Infinity`
codePoint < 0 || // not a valid Unicode code point
codePoint > 0x10FFFF || // not a valid Unicode code point
floor(codePoint) != codePoint // not an integer
) {
throw RangeError('Invalid code point: ' + codePoint);

This comment has been minimized.

Copy link
@kof

kof Jun 17, 2014

Can we somehow change this behavior? It should emit an error. #116
@isaacs you need to try/catch this fn call and emit error ... just 2loc

This comment has been minimized.

Copy link
@mathiasbynens

mathiasbynens Jun 17, 2014

That would mean this is no longer a String.fromCodePoint polyfill though, so the rest of the code would have to be changed so it no longer defines this function as String.fromCodePoint. Slightly more than just 2 lines of code ;)

This comment has been minimized.

Copy link
@kof

kof Jun 17, 2014

I didn't mean to change the polyfill, I mean to try/catch the . fromCodePoint call. But just in case a native implementation would also throw this.

This comment has been minimized.

Copy link
@kof

kof Jun 17, 2014

RangeError: NaN is not a valid code point
String.fromCodePoint(undefined)

This is what I get in firefox, so fromCodePoint throws correctly? I didn't read the spec. So we need to ensure its not undefined before making this call probably instead of try/catch.

This comment has been minimized.

Copy link
@mathiasbynens

mathiasbynens Jun 17, 2014

Oh, right, that makes sense. I was confused because the inline comment suggested otherwise.

This comment has been minimized.

Copy link
@mathiasbynens

mathiasbynens Jun 17, 2014

This is what I get in firefox, so fromCodePoint throws correctly? I didn't read the spec. So we need to ensure its not undefined before making this call probably instead of try/catch.

This polyfill follows the spec.

}
if (codePoint <= 0xFFFF) { // BMP code point
codeUnits.push(codePoint);
} else { // Astral code point; split in surrogate halves
// http://mathiasbynens.be/notes/javascript-encoding#surrogate-formulae
codePoint -= 0x10000;
highSurrogate = (codePoint >> 10) + 0xD800;
lowSurrogate = (codePoint % 0x400) + 0xDC00;
codeUnits.push(highSurrogate, lowSurrogate);
}
if (index + 1 == length || codeUnits.length > MAX_SIZE) {
result += stringFromCharCode.apply(null, codeUnits);
codeUnits.length = 0;
}
}
return result;
};
if (Object.defineProperty) {
Object.defineProperty(String, 'fromCodePoint', {
'value': fromCodePoint,
'configurable': true,
'writable': true
});
} else {
String.fromCodePoint = fromCodePoint;
}
}());
}

})(typeof exports === "undefined" ? sax = {} : exports)
12 changes: 12 additions & 0 deletions test/emoji.js
@@ -0,0 +1,12 @@
// split high-order numeric attributes into surrogate pairs
require(__dirname).test
( { xml : '<a>&#x1f525;</a>'
, expect :
[ [ 'opentag', { name: 'A', attributes: {}, isSelfClosing: false } ]
, [ 'text', '\ud83d\udd25' ]
, [ 'closetag', 'A' ]
]
, strict : false
, opt : {}
}
)

0 comments on commit 51b27e7

Please sign in to comment.