Skip to content
Browse files

Unescape hex numeric character references

  • Loading branch information...
1 parent e432b0e commit d80f5c05b3e230278e800b68c07f27b5d1cfb7e1 Andreas Karlsson committed Aug 9, 2011
Showing with 12 additions and 2 deletions.
  1. +2 −1 lib/hpricot/builder.rb
  2. +2 −1 lib/hpricot/xchar.rb
  3. +8 −0 test/test_parser.rb
View
3 lib/hpricot/builder.rb
@@ -8,7 +8,8 @@ module Hpricot
def self.uxs(str)
str.to_s.
gsub(/\&(\w+);/) { [NamedCharacters[$1] || 63].pack("U*") }. # 63 = ?? (query char)
- gsub(/\&\#(\d+);/) { [$1.to_i].pack("U*") }
+ gsub(/\&\#(\d+);/) { [$1.to_i].pack("U*") }.
+ gsub(/\&\#x([0-9a-fA-F]+);/) { [$1.to_i(16)].pack("U*") }
end
def self.build(ele = Doc.new, assigns = {}, &blk)
View
3 lib/hpricot/xchar.rb
@@ -87,7 +87,8 @@ def xs(str)
def uxs(str)
str.to_s.
gsub(/\&\w+;/) { |x| (XChar::PREDEFINED_U[x] || 63).chr }. # 63 = ?? (query char)
- gsub(/\&\#(\d+);/) { [$1.to_i].pack("U*") }
+ gsub(/\&\#(\d+);/) { [$1.to_i].pack("U*") }.
+ gsub(/\&\#x([0-9a-fA-F]+);/) { [$1.to_i(16)].pack("U*") }
end
end
end
View
8 test/test_parser.rb
@@ -417,6 +417,14 @@ def test_uxs_handles_numeric_values
end
end
+ def test_uxs_handles_hexadecimal_values
+ if String.method_defined? :encoding
+ assert_equal "é", Hpricot.uxs('é')
+ else
+ assert_equal "\303\251", Hpricot.uxs('é')
+ end
+ end
+
def test_uxs_handles_entities
if String.method_defined? :encoding
assert_equal "é", Hpricot.uxs('é')

0 comments on commit d80f5c0

Please sign in to comment.
Something went wrong with that request. Please try again.