-
-
Notifications
You must be signed in to change notification settings - Fork 1.6k
/
reader.cr
209 lines (182 loc) · 6.37 KB
/
reader.cr
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
require "./libxml2"
require "./parser_options"
# `XML::Reader` is a parser for XML that iterates a XML document.
#
# ```
# require "xml"
#
# reader = XML::Reader.new(<<-XML)
# <message>Hello XML!</message>
# XML
# reader.read
# reader.name # => "message"
# reader.read
# reader.value # => "Hello XML!"
# ```
#
# This is an alternative approach to `XML.parse` which parses an entire document
# into an XML data structure.
# `XML::Reader` offers more control and does not need to store the XML document
# in memory entirely. The latter is especially useful for large documents with
# the `IO`-based constructor.
#
# WARNING: This type is not concurrency-safe.
class XML::Reader
# Returns the errors reported while parsing.
getter errors = [] of XML::Error
# Creates a new reader from a string.
#
# See `XML::ParserOptions.default` for default options.
def initialize(str : String, options : XML::ParserOptions = XML::ParserOptions.default)
@reader = LibXML.xmlReaderForMemory(str, str.bytesize, nil, nil, options)
LibXML.xmlTextReaderSetErrorHandler @reader, ->(arg, msg, severity, locator) do
msg_str = String.new(msg).chomp
line_number = LibXML.xmlTextReaderLocatorLineNumber(locator)
raise Error.new(msg_str, line_number)
end
end
# Creates a new reader from an IO.
#
# See `XML::ParserOptions.default` for default options.
def initialize(io : IO, options : XML::ParserOptions = XML::ParserOptions.default)
@reader = LibXML.xmlReaderForIO(
->(context, buffer, length) { Box(IO).unbox(context).read(Slice.new(buffer, length)).to_i },
->(context) { Box(IO).unbox(context).close; 0 },
Box(IO).box(io),
nil,
nil,
options
)
end
# Moves the reader to the next node.
def read : Bool
collect_errors { LibXML.xmlTextReaderRead(@reader) == 1 }
end
# Moves the reader to the next node while skipping subtrees.
def next : Bool
LibXML.xmlTextReaderNext(@reader) == 1
end
# Moves the reader to the next sibling node while skipping subtrees.
def next_sibling : Bool
result = LibXML.xmlTextReaderNextSibling(@reader)
# Work around libxml2 with incomplete xmlTextReaderNextSibling()
# see: https://gitlab.gnome.org/GNOME/libxml2/issues/7
if result == -1
node = LibXML.xmlTextReaderCurrentNode(@reader)
if node.null?
collect_errors { LibXML.xmlTextReaderRead(@reader) == 1 }
elsif !node.value.next.null?
LibXML.xmlTextReaderNext(@reader) == 1
else
false
end
else
result == 1
end
end
# Returns the `XML::Reader::Type` of the node.
def node_type : XML::Reader::Type
LibXML.xmlTextReaderNodeType(@reader)
end
# Returns the name of the node.
def name : String
value = LibXML.xmlTextReaderConstName(@reader)
value ? String.new(value) : ""
end
# Checks if the node is an empty element.
def empty_element? : Bool
LibXML.xmlTextReaderIsEmptyElement(@reader) == 1
end
# Checks if the node has any attributes.
def has_attributes? : Bool
LibXML.xmlTextReaderHasAttributes(@reader) == 1
end
# Returns attribute count of the node.
def attributes_count : Int32
LibXML.xmlTextReaderAttributeCount(@reader)
end
# Moves to the first `XML::Reader::Type::ATTRIBUTE` of the node.
def move_to_first_attribute : Bool
LibXML.xmlTextReaderMoveToFirstAttribute(@reader) == 1
end
# Moves to the next `XML::Reader::Type::ATTRIBUTE` of the node.
def move_to_next_attribute : Bool
LibXML.xmlTextReaderMoveToNextAttribute(@reader) == 1
end
# Moves to the `XML::Reader::Type::ATTRIBUTE` with the specified name.
def move_to_attribute(name : String) : Bool
check_no_null_byte(name)
LibXML.xmlTextReaderMoveToAttribute(@reader, name) == 1
end
# Gets the attribute content for the *attribute* given by name.
# Raises `KeyError` if attribute is not found.
def [](attribute : String) : String
self[attribute]? || raise(KeyError.new("Missing attribute: #{attribute}"))
end
# Gets the attribute content for the *attribute* given by name.
# Returns `nil` if attribute is not found.
def []?(attribute : String) : String?
check_no_null_byte(attribute)
value = LibXML.xmlTextReaderGetAttribute(@reader, attribute)
String.new(value) if value
end
# Moves from the `XML::Reader::Type::ATTRIBUTE` to its containing `XML::Reader::Type::ELEMENT`.
def move_to_element : Bool
LibXML.xmlTextReaderMoveToElement(@reader) == 1
end
# Returns the current nesting depth of the reader.
def depth : Int32
LibXML.xmlTextReaderDepth(@reader)
end
# Returns the node's XML content including subtrees.
def read_inner_xml : String
xml = collect_errors { LibXML.xmlTextReaderReadInnerXml(@reader) }
xml ? String.new(xml) : ""
end
# Returns the XML for the node and its content including subtrees.
def read_outer_xml : String
# On a NONE type libxml2 2.9.9 is giving a segfault:
#
# https://gitlab.gnome.org/GNOME/libxml2/issues/43
#
# so we avoid the issue by returning early here.
#
# FIXME: if that issue is fixed we should revert this line
# to avoid doing an extra C call each time.
return "" if node_type.none?
xml = collect_errors { LibXML.xmlTextReaderReadOuterXml(@reader) }
xml ? String.new(xml) : ""
end
# Expands the node to a `XML::Node` that can be searched with XPath etc.
# The returned `XML::Node` is only valid until the next call to `#read`.
#
# Raises a `XML::Error` if the node could not be expanded.
def expand : XML::Node
expand? || raise XML::Error.new LibXML.xmlGetLastError
end
# Expands the node to a `XML::Node` that can be searched with XPath etc.
# The returned `XML::Node` is only valid until the next call to `#read`.
#
# Returns `nil` if the node could not be expanded.
def expand? : XML::Node?
xml = LibXML.xmlTextReaderExpand(@reader)
XML::Node.new(xml) if xml
end
# Returns the text content of the node.
def value : String
value = LibXML.xmlTextReaderConstValue(@reader)
value ? String.new(value) : ""
end
# Returns a reference to the underlying `LibXML::XMLTextReader`.
def to_unsafe
@reader
end
private def collect_errors(&)
Error.collect(@errors) { yield }
end
private def check_no_null_byte(attribute)
if attribute.byte_index(0)
raise XML::Error.new("Invalid attribute name: #{attribute.inspect} (contains null character)", 0)
end
end
end