Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
Merge pull request rubinius#1752 from LTe/update_rexml
Update REXML (from ruby MRI)
  • Loading branch information
dbussink committed May 28, 2012
2 parents f0b98aa + 65e6ebc commit 0672a14
Show file tree
Hide file tree
Showing 33 changed files with 194 additions and 189 deletions.
4 changes: 4 additions & 0 deletions kernel/common/encoding.rb
Expand Up @@ -53,6 +53,10 @@ def initialize(from, to, options=undefined)

def convert(str)
end

# TODO: Add implementation for this method
def self.search_convpath(from, to, options={})
end
end

def self.aliases
Expand Down
2 changes: 1 addition & 1 deletion lib/18/rexml/element.rb
Expand Up @@ -492,7 +492,7 @@ def get_text path = nil
def text=( text )
if text.kind_of? String
text = Text.new( text, whitespace(), nil, raw() )
elsif text and !text.kind_of? Text
elsif !text.nil? and !text.kind_of? Text
text = Text.new( text.to_s, whitespace(), nil, raw() )
end
old_text = get_text
Expand Down
4 changes: 2 additions & 2 deletions lib/18/rexml/encodings/SHIFT-JIS.rb
Expand Up @@ -13,8 +13,8 @@ def encode_sjis(str)
rescue LoadError
require 'nkf'

SJISTOU8 = '-Swm0'
U8TOSJIS = '-Wsm0'
SJISTOU8 = '-Swm0x'
U8TOSJIS = '-Wsm0x'

def decode_sjis(str)
NKF.nkf(SJISTOU8, str)
Expand Down
4 changes: 2 additions & 2 deletions lib/18/rexml/rexml.rb
Expand Up @@ -13,7 +13,7 @@
# Author:: Sean Russell <serATgermaneHYPHENsoftwareDOTcom>
# Version:: 3.1.7.2
# Date:: 2007/275
# Revision:: $Revision: 22842 $
# Revision:: $Revision$
#
# This API documentation can be downloaded from the REXML home page, or can
# be accessed online[http://www.germane-software.com/software/rexml_doc]
Expand All @@ -25,7 +25,7 @@ module REXML
COPYRIGHT = "Copyright \xC2\xA9 2001-2006 Sean Russell <ser@germane-software.com>"
VERSION = "3.1.7.3"
DATE = "2007/275"
REVISION = "$Revision: 22842 $".gsub(/\$Revision:|\$/,'').strip
REVISION = "$Revision$".gsub(/\$Revision:|\$/,'').strip

Copyright = COPYRIGHT
Version = VERSION
Expand Down
2 changes: 1 addition & 1 deletion lib/18/rexml/text.rb
Expand Up @@ -286,7 +286,7 @@ def Text::read_with_substitution( input, illegal=nil )
EREFERENCE = /&(?!#{Entity::NAME};)/
# Escapes all possible entities
def Text::normalize( input, doctype=nil, entity_filter=nil )
copy = input
copy = input.to_s
# Doing it like this rather than in a loop improves the speed
#copy = copy.gsub( EREFERENCE, '&amp;' )
copy = copy.gsub( "&", "&amp;" )
Expand Down
2 changes: 1 addition & 1 deletion lib/19/rexml/attribute.rb
Expand Up @@ -115,7 +115,7 @@ def to_string
def doctype
if @element
doc = @element.document
doctype = doc.doctype if doc
doc.doctype if doc
end
end

Expand Down
2 changes: 1 addition & 1 deletion lib/19/rexml/cdata.rb
Expand Up @@ -6,7 +6,7 @@ class CData < Text
STOP = ']]>'
ILLEGAL = /(\]\]>)/

# Constructor. CData is data between <![CDATA[ ... ]]>
# Constructor. CData is data between <![CDATA[ ... ]]>
#
# _Examples_
# CData.new( source )
Expand Down
4 changes: 2 additions & 2 deletions lib/19/rexml/child.rb
Expand Up @@ -7,7 +7,7 @@ module REXML
# class directly.
class Child
include Node
attr_reader :parent # The Parent of this object
attr_reader :parent # The Parent of this object

# Constructor. Any inheritors of this class should call super to make
# sure this method is called.
Expand Down Expand Up @@ -88,7 +88,7 @@ def document

# This doesn't yet handle encodings
def bytes
encoding = document.encoding
document.encoding

to_s
end
Expand Down
12 changes: 6 additions & 6 deletions lib/19/rexml/comment.rb
Expand Up @@ -38,15 +38,15 @@ def clone
# See REXML::Formatters
#
# output::
# Where to write the string
# Where to write the string
# indent::
# An integer. If -1, no indenting will be used; otherwise, the
# indentation will be this number of spaces, and children will be
# indented an additional amount.
# An integer. If -1, no indenting will be used; otherwise, the
# indentation will be this number of spaces, and children will be
# indented an additional amount.
# transitive::
# Ignored by this class. The contents of comments are never modified.
# Ignored by this class. The contents of comments are never modified.
# ie_hack::
# Needed for conformity to the child API, but not used by this class.
# Needed for conformity to the child API, but not used by this class.
def write( output, indent=-1, transitive=false, ie_hack=false )
Kernel.warn("Comment.write is deprecated. See REXML::Formatters")
indent( output, indent )
Expand Down
11 changes: 5 additions & 6 deletions lib/19/rexml/doctype.rb
Expand Up @@ -115,7 +115,6 @@ def write( output, indent=0, transitive=false, ie_hack=false )
output << " #{@long_name.inspect}" if @long_name
output << " #{@uri.inspect}" if @uri
unless @children.empty?
next_indent = indent + 1
output << ' ['
@children.each { |child|
output << "\n"
Expand Down Expand Up @@ -249,11 +248,11 @@ def initialize name, middle, pub, sys
end

def to_s
"<!NOTATION #@name #@middle#{
@public ? ' ' + public.inspect : ''
}#{
@system ? ' ' +@system.inspect : ''
}>"
notation = "<!NOTATION #{@name} #{@middle}"
notation << " #{@public.inspect}" if @public
notation << " #{@system.inspect}" if @system
notation << ">"
notation
end

def write( output, indent=-1 )
Expand Down
7 changes: 4 additions & 3 deletions lib/19/rexml/document.rb
Expand Up @@ -131,7 +131,8 @@ def version
xml_decl().version
end

# @return the XMLDecl encoding of this document as a String.
# @return the XMLDecl encoding of this document as an
# Encoding object.
# If no XMLDecl has been set, returns the default encoding.
def encoding
xml_decl().encoding
Expand Down Expand Up @@ -164,7 +165,7 @@ def stand_alone?
# Document.new("<a><b/></a>").serialize( tr )
#
# output::
# output an object which supports '<< string'; this is where the
# output an object which supports '<< string'; this is where the
# document will be written.
# indent::
# An integer. If -1, no indenting will be used; otherwise, the
Expand All @@ -183,7 +184,7 @@ def stand_alone?
# that IE's limited abilities can handle. This hack inserts a space
# before the /> on empty tags. Defaults to false
def write( output=$stdout, indent=-1, transitive=false, ie_hack=false )
if xml_decl.encoding != "UTF-8" && !output.kind_of?(Output)
if xml_decl.encoding != 'UTF-8' && !output.kind_of?(Output)
output = Output.new( output, xml_decl.encoding )
end
formatter = if indent > -1
Expand Down
37 changes: 18 additions & 19 deletions lib/19/rexml/element.rb
Expand Up @@ -20,7 +20,7 @@ module REXML
class Element < Parent
include Namespace

UNDEFINED = "UNDEFINED"; # The default name
UNDEFINED = "UNDEFINED"; # The default name

# Mechanisms for accessing attributes and child elements of this
# element.
Expand All @@ -31,17 +31,17 @@ class Element < Parent

# Constructor
# arg::
# if not supplied, will be set to the default value.
# If a String, the name of this object will be set to the argument.
# If an Element, the object will be shallowly cloned; name,
# attributes, and namespaces will be copied. Children will +not+ be
# copied.
# if not supplied, will be set to the default value.
# If a String, the name of this object will be set to the argument.
# If an Element, the object will be shallowly cloned; name,
# attributes, and namespaces will be copied. Children will +not+ be
# copied.
# parent::
# if supplied, must be a Parent, and will be used as
# the parent of this object.
# if supplied, must be a Parent, and will be used as
# the parent of this object.
# context::
# If supplied, must be a hash containing context items. Context items
# include:
# If supplied, must be a hash containing context items. Context items
# include:
# * <tt>:respect_whitespace</tt> the value of this is :+all+ or an array of
# strings being the names of the elements to respect
# whitespace for. Defaults to :+all+.
Expand Down Expand Up @@ -297,7 +297,7 @@ def add_element element, attrs=nil
el = @elements.add(element)
attrs.each do |key, value|
el.attributes[key]=value
end if attrs.kind_of? Hash
end if attrs.kind_of? Hash
el
end

Expand Down Expand Up @@ -667,7 +667,7 @@ def texts
#
# Writes out this element, and recursively, all children.
# output::
# output an object which supports '<< string'; this is where the
# output an object which supports '<< string'; this is where the
# document will be written.
# indent::
# An integer. If -1, no indenting will be used; otherwise, the
Expand Down Expand Up @@ -778,7 +778,7 @@ def []( index, name=nil)
else
return XPath::first( @element, index )
#{ |element|
# return element if element.kind_of? Element
# return element if element.kind_of? Element
#}
#return nil
end
Expand Down Expand Up @@ -874,7 +874,6 @@ def delete_all( xpath )
# a.elements.add(Element.new('b')) #-> <a><b/></a>
# a.elements.add('c') #-> <a><b/><c/></a>
def add element=nil
rv = nil
if element.nil?
Element.new("", self, @element.context)
elsif not element.kind_of?(Element)
Expand Down Expand Up @@ -1087,12 +1086,12 @@ def get_attribute( name )
# doc.root.attributes['foo'] = '4'
# doc.root.attributes['x:foo'] = nil
def []=( name, value )
if value.nil? # Delete the named attribute
if value.nil? # Delete the named attribute
attr = get_attribute(name)
delete attr
return
end
element_document = @element.document

unless value.kind_of? Attribute
if @element.document and @element.document.doctype
value = Text::normalize( value, @element.document.doctype )
Expand All @@ -1117,8 +1116,8 @@ def []=( name, value )
value.prefix != "xmlns" and old_attr.prefix != "xmlns" and
@element.namespace( old_attr.prefix ) ==
@element.namespace( value.prefix )
store value.name, { old_attr.prefix => old_attr,
value.prefix => value }
store value.name, { old_attr.prefix => old_attr,
value.prefix => value }
else
store value.name, value
end
Expand Down Expand Up @@ -1197,7 +1196,7 @@ def delete( attribute )
return @element
else # the supplied attribute is a top-level one
attr = old
res = super(name)
super(name)
end
@element
end
Expand Down
107 changes: 49 additions & 58 deletions lib/19/rexml/encoding.rb
@@ -1,71 +1,62 @@
# -*- mode: ruby; ruby-indent-level: 2; indent-tabs-mode: t; tab-width: 2 -*- vim: sw=2 ts=2
module REXML
module Encoding
@encoding_methods = {}
def self.register(enc, &block)
@encoding_methods[enc] = block
end
def self.apply(obj, enc)
@encoding_methods[enc][obj]
end
def self.encoding_method(enc)
@encoding_methods[enc]
end

# Native, default format is UTF-8, so it is declared here rather than in
# an encodings/ definition.
UTF_8 = 'UTF-8'
UTF_16 = 'UTF-16'
UNILE = 'UNILE'

# ID ---> Encoding name
attr_reader :encoding
def encoding=( enc )
old_verbosity = $VERBOSE
begin
$VERBOSE = false
enc = enc.nil? ? nil : enc.upcase
return false if defined? @encoding and enc == @encoding
if enc and enc != UTF_8
@encoding = enc
raise ArgumentError, "Bad encoding name #@encoding" unless @encoding =~ /^[\w-]+$/
@encoding.untaint
begin
require 'rexml/encodings/ICONV.rb'
Encoding.apply(self, "ICONV")
rescue LoadError, Exception
begin
enc_file = File.join( "rexml", "encodings", "#@encoding.rb" )
require enc_file
Encoding.apply(self, @encoding)
rescue LoadError => err
puts err.message
raise ArgumentError, "No decoder found for encoding #@encoding. Please install iconv."
end
end
else
@encoding = UTF_8
require 'rexml/encodings/UTF-8.rb'
Encoding.apply(self, @encoding)
def encoding=(encoding)
encoding = encoding.name if encoding.is_a?(Encoding)
if encoding.is_a?(String)
original_encoding = encoding
encoding = find_encoding(encoding)
unless encoding
raise ArgumentError, "Bad encoding name #{original_encoding}"
end
ensure
$VERBOSE = old_verbosity
end
return false if defined?(@encoding) and encoding == @encoding
if encoding
@encoding = encoding.upcase
else
@encoding = 'UTF-8'
end
true
end

def check_encoding str
# We have to recognize UTF-16, LSB UTF-16, and UTF-8
if str[0,2] == "\xfe\xff"
str[0,2] = ""
return UTF_16
elsif str[0,2] == "\xff\xfe"
str[0,2] = ""
return UNILE
def check_encoding(xml)
# We have to recognize UTF-16BE, UTF-16LE, and UTF-8
if xml[0, 2] == "\xfe\xff"
xml[0, 2] = ""
return 'UTF-16BE'
elsif xml[0, 2] == "\xff\xfe"
xml[0, 2] = ""
return 'UTF-16LE'
end
xml =~ /^\s*<\?xml\s+version\s*=\s*(['"]).*?\1\s+encoding\s*=\s*(["'])(.*?)\2/m
return $3 ? $3.upcase : 'UTF-8'
end

def encode(string)
string.encode(@encoding)
end

def decode(string)
string.encode(::Encoding::UTF_8, @encoding)
end

private
def find_encoding(name)
case name
when /\Ashift-jis\z/i
return "SHIFT_JIS"
when /\ACP-(\d+)\z/
name = "CP#{$1}"
when /\AUTF-8\z/i
return name
end
begin
::Encoding::Converter.search_convpath(name, 'UTF-8')
rescue ::Encoding::ConverterNotFoundError
return nil
end
str =~ /^\s*<\?xml\s+version\s*=\s*(['"]).*?\1\s+encoding\s*=\s*(["'])(.*?)\2/m
return $3.upcase if $3
return UTF_8
name
end
end
end

0 comments on commit 0672a14

Please sign in to comment.