Permalink
Browse files

Merge pull request #106 from mark-rushakoff/html-entities

Handle numeric character references in unescapeHTML
  • Loading branch information...
rwz committed Apr 11, 2012
2 parents e66b436 + 8611795 commit a9628b12297a6c90d0bbf34574df5a6ee0be9b4a
Showing with 30 additions and 2 deletions.
  1. +21 −2 lib/underscore.string.js
  2. +9 −0 test/strings.js
View
@@ -209,8 +209,27 @@
},
unescapeHTML: function(str) {
- return (''+str).replace(/&lt;/g, '<').replace(/&gt;/g, '>')
- .replace(/&quot;/g, '"').replace(/&apos;/g, "'").replace(/&amp;/g, '&');
+ return (''+str).replace(/&([^;]+);/g, function(matched, p1) {
+ switch (p1) {
+ case 'lt': return '<';
+ case 'gt': return '>';
+ case 'quot': return '"';
+ case 'apos': return "'";
+ case 'amp': return '&';
+ }
+
+ var hexNumericEntity = p1.match(/^#x([0-9a-fA-F]+)$/);
+ if (hexNumericEntity) {
+ return String.fromCharCode(parseInt(hexNumericEntity[1], 16));
+ }
+
+ var numericEntity = p1.match(/^#(\d+)$/);
+ if (numericEntity) {
+ return String.fromCharCode(parseInt(numericEntity[1], 10));
+ }
+
+ return matched;
+ });
},
escapeRegExp: function(str){
View
@@ -266,6 +266,15 @@ $(document).ready(function() {
equals(_('&lt;div&gt;Blah &amp; &quot;blah&quot; &amp; &apos;blah&apos;&lt;/div&gt;').unescapeHTML(),
'<div>Blah & "blah" & \'blah\'</div>');
equals(_('&amp;lt;').unescapeHTML(), '&lt;');
+ equals(_('&#39;').unescapeHTML(), "'");
+ equals(_('&#0039;').unescapeHTML(), "'");
+ equals(_('&#x4a;').unescapeHTML(), "J");
+ equals(_('&#x04A;').unescapeHTML(), "J");
+ equals(_('&#X4A;').unescapeHTML(), "&#X4A;");
+ equals(_('&_#39;').unescapeHTML(), "&_#39;");
+ equals(_('&#39_;').unescapeHTML(), "&#39_;");
+ equals(_('&amp;#38;').unescapeHTML(), "&#38;");
+ equals(_('&#38;amp;').unescapeHTML(), "&amp;");
equals(_(5).unescapeHTML(), '5');
// equals(_(undefined).unescapeHTML(), '');
});

0 comments on commit a9628b1

Please sign in to comment.