Skip to content

Commit

Permalink
Fixed SAX parsing of XML attributes.
Browse files Browse the repository at this point in the history
This was utterly broken, mainly due to me overlooking it. There are now 2 new
callbacks to handle this properly:

* on_attribute: to handle a single attribute/value pair
* on_attributes: to handle a collection of attributes (as returned by
  on_attribute)

By default on_attribut returns a Hash, on_attributes in turn merges all
attribute hashes into a single one. This ensures that on_element _actually_
receives the attributes as a Hash, instead of an Array with random
nil/XML::Attribute values.
  • Loading branch information
Yorick Peterse committed Mar 21, 2015
1 parent 605d565 commit d8b9725
Show file tree
Hide file tree
Showing 3 changed files with 163 additions and 43 deletions.
33 changes: 20 additions & 13 deletions lib/oga/xml/parser.rll
Original file line number Diff line number Diff line change
Expand Up @@ -154,15 +154,12 @@ element

# Attributes

attributes
= attributes_ { val[0] }
| _
;
attributes = attributes_ { on_attributes(val[0]) };

attributes_
= attribute attributes
= attribute attributes_
{
val[1].unshift(val[0])
val[1].unshift(val[0]) if val[0]
val[1]
}
| _
Expand All @@ -171,9 +168,7 @@ attributes_
attribute
= attribute_name attribute_follow
{
val[0].value = val[1]

val[0]
on_attribute(val[0][1], val[0][0], val[1])
}
;

Expand All @@ -183,8 +178,8 @@ attribute_follow
;

attribute_name
= T_ATTR { on_attribute(val[0]) }
| T_ATTR_NS T_ATTR { on_attribute(val[1], val[0]) }
= T_ATTR { [nil, val[0]] }
| T_ATTR_NS T_ATTR
;

# XML declarations
Expand Down Expand Up @@ -388,9 +383,21 @@ string_body_follow
##
# @param [String] name
# @param [String] ns_name
# @param [String] value
# @return [Oga::XML::Attribute]
#
def on_attribute(name, ns_name = nil)
return Attribute.new(:namespace_name => ns_name, :name => name)
def on_attribute(name, ns_name = nil, value = nil)
return Attribute.new(
:namespace_name => ns_name,
:name => name,
:value => value
)
end

##
# @param [Array] attrs
#
def on_attributes(attrs)
return attrs
end
}
51 changes: 50 additions & 1 deletion lib/oga/xml/sax_parser.rb
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ module XML
# * `on_text`
# * `on_element`
# * `on_element_children`
# * `on_attribute`
# * `on_attributes`
# * `after_element`
#
# For example:
Expand Down Expand Up @@ -57,6 +59,14 @@ module XML
# end
# end
#
# ## Attributes
#
# Attributes returned by `on_attribute` are passed as an Hash as the 3rd
# argument of the `on_element` callback. The keys of this Hash are the
# attribute names (optionally prefixed by their namespace) and their values.
# You can overwrite `on_attribute` to control individual attributes and
# `on_attributes` to control the final set.
#
class SaxParser < Parser
##
# @param [Object] handler The SAX handler to delegate callbacks to.
Expand Down Expand Up @@ -86,7 +96,7 @@ def #{method}(*args)
# @see [Oga::XML::Parser#on_element]
# @return [Array]
#
def on_element(namespace, name, attrs = {})
def on_element(namespace, name, attrs = [])
run_callback(:on_element, namespace, name, attrs)

return namespace, name
Expand All @@ -101,6 +111,45 @@ def on_element(namespace, name, attrs = {})
#
def after_element(namespace_with_name)
run_callback(:after_element, *namespace_with_name)

return
end

##
# Manually overwrite this method since for this one we _do_ want the
# return value so it can be passed to `on_element`.
#
# @see [Oga::XML::Parser#on_attribute]
#
def on_attribute(name, ns = nil, value = nil)
if @handler.respond_to?(:on_attribute)
return run_callback(:on_attribute, name, ns, value)
end

key = ns ? "#{ns}:#{name}" : name

return {key => value}
end

##
# Merges the attributes together into a Hash.
#
# @param [Array] attrs
# @return [Hash]
#
def on_attributes(attrs)
if @handler.respond_to?(:on_attributes)
return run_callback(:on_attributes, attrs)
end

merged = {}

attrs.each do |pair|
# Hash#merge requires an extra allocation, this doesn't.
pair.each { |key, value| merged[key] = value }
end

return merged
end

private
Expand Down
122 changes: 93 additions & 29 deletions spec/oga/xml/sax_parser_spec.rb
Original file line number Diff line number Diff line change
@@ -1,50 +1,114 @@
require 'spec_helper'

describe Oga::XML::SaxParser do
before do
@handler = Class.new do
attr_reader :name, :after_namespace, :after_name
describe '#parse' do
before do
@handler = Class.new do
attr_reader :name, :attrs, :after_namespace, :after_name

def on_element(namespace, name, attrs = {})
@name = name
end
def on_element(namespace, name, attrs = {})
@name = name
@attrs = attrs
end

def after_element(namespace, name)
@after_namespace = namespace
@after_name = name
def after_element(namespace, name)
@after_namespace = namespace
@after_name = name
end
end
end
end

it 'ignores return values of callback methods' do
parser = described_class.new(@handler.new, 'foo')
it 'ignores return values of callback methods' do
parser = described_class.new(@handler.new, 'foo')

parser.parse.should be_nil
end
parser.parse.should be_nil
end

it 'uses custom callback methods if defined' do
handler = @handler.new
parser = described_class.new(handler, '<foo />')

parser.parse

handler.name.should == 'foo'
end

it 'always passes element names to after_element' do
handler = @handler.new
parser = described_class.new(handler, '<namespace:foo />')

parser.parse

handler.after_name.should == 'foo'
handler.after_namespace.should == 'namespace'
end

it 'ignores callbacks that are not defined in the handler' do
parser = described_class.new(@handler.new, '<!--foo-->')

# This would raise if undefined callbacks were _not_ ignored.
lambda { parser.parse }.should_not raise_error
end

it 'uses custom callback methods if defined' do
handler = @handler.new
parser = described_class.new(handler, '<foo />')
it 'passes the attributes to the on_element callback' do
handler = @handler.new
parser = described_class.new(handler, '<a b="10" x:c="20" />')

parser.parse
parser.parse

handler.name.should == 'foo'
handler.attrs.should == {'b' => '10', 'x:c' => '20'}
end
end

it 'always passes element names to after_element' do
handler = @handler.new
parser = described_class.new(handler, '<namespace:foo />')
describe '#on_attribute' do
before do
@handler_without = Class.new.new

@handler_with = Class.new do
def on_attribute(name, ns = nil, value = nil)
return {name.upcase => value}
end
end.new
end

parser.parse
it 'returns a default Hash if no custom callback exists' do
parser = described_class.new(@handler_without, '<a x:foo="bar" />')
hash = parser.on_attribute('foo', 'x', 'bar')

handler.after_name.should == 'foo'
handler.after_namespace.should == 'namespace'
hash.should == {'x:foo' => 'bar'}
end

it 'returns the return value of a custom callback' do
parser = described_class.new(@handler_with, nil)
hash = parser.on_attribute('foo', 'x', 'bar')

hash.should == {'FOO' => 'bar'}
end
end

it 'ignores callbacks that are not defined in the handler' do
parser = described_class.new(@handler.new, '<!--foo-->')
describe '#on_attributes' do
before do
@handler_without = Class.new.new

# This would raise if undefined callbacks were _not_ ignored.
lambda { parser.parse }.should_not raise_error
@handler_with = Class.new do
def on_attributes(attrs)
return %w{Alice Bob} # these two again
end
end.new
end

it 'merges all attributes into a Hash if no callback is defined' do
parser = described_class.new(@handler_without, nil)
hash = parser.on_attributes([{'a' => 'b'}, {'c' => 'd'}])

hash.should == {'a' => 'b', 'c' => 'd'}
end

it 'returns the return value of a custom callback' do
parser = described_class.new(@handler_with, nil)
retval = parser.on_attributes([{'a' => 'b'}, {'c' => 'd'}])

retval.should == %w{Alice Bob}
end
end
end

0 comments on commit d8b9725

Please sign in to comment.