Skip to content

Commit

Permalink
[Xml] Add in a strict flag to the parser, on by default. If the flag …
Browse files Browse the repository at this point in the history
…is off, we can attempt to work around some errors, like tags which are not properly closed (common in HTML docs). This allows us to parse XML docs with unbalanced tags
  • Loading branch information
Whiteknight committed Jun 28, 2012
1 parent 4897e48 commit d02a703
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 9 deletions.
4 changes: 2 additions & 2 deletions src/unstable/xml/Document.winxed
Original file line number Diff line number Diff line change
Expand Up @@ -80,9 +80,9 @@ class Rosella.Xml.Document
}

// Read the xml from string and parse it into this document
function read_from_string(string xml, int do_validate = false)
function read_from_string(string xml, int do_validate = false, int strict = true)
{
Rosella.Xml.parse(xml, self, do_validate);
Rosella.Xml.parse(xml, self, do_validate, strict);
}

// Validate this document against its DTD
Expand Down
27 changes: 24 additions & 3 deletions src/unstable/xml/Parser.winxed
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
namespace Rosella.Xml.Parser
{
function __parse_xml(string xml, var s, var b, int len, var document)
function __parse_xml(string xml, var s, var b, int len, var document, int strict)
{
var tag_stack = [];
var current_tag = new Rosella.Xml.Tag.DocumentRoot();
Expand Down Expand Up @@ -32,8 +32,29 @@ namespace Rosella.Xml.Parser
if (is_close) {
if (tag.xmlns != current_tag.xmlns)
Rosella.Error.error("Syntax error, mismatched tags. Tag namespaces and/or names do not match at position %d", current_position(len, s, b));
if (tag.name != current_tag.name)
Rosella.Error.error("Syntax error, mismatched tags. Found closing tag '%s' at position %d but current tag is '%s'", tag.name, current_position(len, s, b), current_tag.name);
if (tag.name != current_tag.name) {
if (strict)
Rosella.Error.error("Syntax error, mismatched tags. Found closing tag '%s' at position %d but current tag is '%s'", tag.name, current_position(len, s, b), current_tag.name);
else {
/* If we're not in strict mode, we can try to search
up the tag for the matching open tag and just
deal with it. */
current_tag = null;
while(elements(tag_stack) > 0) {
var dead_tag = tag_stack.pop();
if (dead_tag instanceof Rosella.Xml.Tag.DocumentRoot) {
current_tag = dead_tag;
break;
}
if (dead_tag.name == tag.name) {
dead_tag.set_end(tag);
current_tag = tag_stack.pop();
break;
}
}
continue;
}
}
current_tag.set_end(tag);
current_tag = tag_stack.pop();
continue;
Expand Down
8 changes: 4 additions & 4 deletions src/unstable/xml/Xml.winxed
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,10 @@ namespace Rosella.Xml
}

// Read XML from a string. Return an Xml.Document object
function read_string(string xml)
function read_string(string xml, int strict = true)
{
var document = new Rosella.Xml.Document();
document.read_from_string(xml, false);
document.read_from_string(xml, false, strict);
return document;
}

Expand All @@ -45,12 +45,12 @@ namespace Rosella.Xml

// Parse the XML string into the given document. Optionally perform
// verification on the generated structure
function parse(string xml, var document, int do_verify = true)
function parse(string xml, var document, int do_verify = true, int strict = true)
{
if (document == null)
document = new Rosella.Xml.Document();
:(xml, var s, var b, int len) = Rosella.Parse.setup_parse(xml);
Rosella.Xml.Parser.__parse_xml(xml, s, b, len, document);
Rosella.Xml.Parser.__parse_xml(xml, s, b, len, document, strict);
if (do_verify)
document.validate();
return document;
Expand Down

0 comments on commit d02a703

Please sign in to comment.