Skip to content

Commit

Permalink
[Xml] More bounds-checking during xml parse to avoid certain common e…
Browse files Browse the repository at this point in the history
…rrors.

Fix .to_xml to use indents and newlines to be slightly prettier.
  • Loading branch information
Whiteknight committed Mar 30, 2012
1 parent fc262e2 commit 55bd732
Show file tree
Hide file tree
Showing 5 changed files with 53 additions and 22 deletions.
7 changes: 4 additions & 3 deletions src/unstable/xml/Document.winxed
Expand Up @@ -40,12 +40,13 @@ class Rosella.Xml.Document

function read_from_file(string filename)
{
string xml = (new Rosella.FileSystem.File(filename)).read_all_text();
Rosella.Xml.__parse(xml, self);
var f = new Rosella.FileSystem.File(filename);
string xml = f.read_all_text();
Rosella.Xml.parse(xml, self);
}

function read_from_string(string xml)
{
Rosella.Xml._parse(xml, self);
Rosella.Xml.parse(xml, self);
}
}
20 changes: 20 additions & 0 deletions src/unstable/xml/Includes.winxed
@@ -1,3 +1,5 @@
const string INDENT_STR = " ";

inline is_whitespace(int c) return int
{
return (c <= 0x20 || c >= 127); // Any non-printable characters and all whitespace
Expand All @@ -12,13 +14,31 @@ inline is_alphanumeric(int c) return int
c == 0x5F || c == 0x2D); // c == '_' || c == '-'
}

inline error_on_out_of_bounds(int c, var buffer, int j)
{
if (c == 0) {
int buflen = elements(buffer);
if (j >= buflen)
Rosella.Error.error("Attempt to read position %d, passed the end of the buffer %d", j, buflen);

}
}

inline read_char(var buffer, int j) return int
{
int c = buffer[j];
error_on_out_of_bounds(c, buffer, j);
return c;
}

inline eat_whitespace(var buffer, int j) return int
{
int c = buffer[j];
while (is_whitespace(c)) {
j++;
c = buffer[j];
}
error_on_out_of_bounds(c, buffer, j);
return j;
}

25 changes: 15 additions & 10 deletions src/unstable/xml/Tag.winxed
Expand Up @@ -37,23 +37,26 @@ class Rosella.Xml.Tag
function to_xml()
{
var sb = new 'StringBuilder';
self.to_xml_sb(sb);
self.to_xml_sb(sb, 0);
return sb;
}

function to_xml_sb(var sb)
function to_xml_sb(var sb, int indent)
{
string indent_str = "\n" + repeat_string(INDENT_STR, indent);
push(sb, indent_str);
push(sb, "<");
push(sb, self.name);
self.__to_xml_sb_attrs(sb);
self.__to_xml_sb_attrs(sb, indent);
push(sb, ">");
self.__to_xml_sb_children(sb);
self.__to_xml_sb_children(sb, indent + 1);
push(sb, indent_str);
push(sb, "</");
push(sb, self.name);
push(sb, ">");
}

function __to_xml_sb_attrs(var sb)
function __to_xml_sb_attrs(var sb, int indent)
{
for (string attr_name in self.attributes) {
string attr = self.attributes[attr_name];
Expand All @@ -65,18 +68,18 @@ class Rosella.Xml.Tag
}
}

function __to_xml_sb_children(var sb)
function __to_xml_sb_children(var sb, int indent)
{
for (var child in self.children)
child.to_xml_sb(sb);
child.to_xml_sb(sb, indent);
}
}

class Rosella.Xml.Tag.XmlHeader : Rosella.Xml.Tag
{
function XmlHeader() { self.Tag(""); }

function to_xml_sb(var sb)
function to_xml_sb(var sb, int indent)
{
push(sb, "<?");
push(sb, self.name);
Expand All @@ -97,12 +100,14 @@ class Rosella.Xml.Tag.Comment
function to_xml()
{
var sb = new 'StringBuilder';
self.to_xml_sb(sb);
self.to_xml_sb(sb, 0);
return sb;
}

function to_xml_sb(var sb)
function to_xml_sb(var sb, int indent)
{
string indent_str = "\n" + repeat_string(INDENT_STR, indent);
push(sb, indent_str);
push(sb, "<!--");
push(sb, self.text);
push(sb, "-->");
Expand Down
6 changes: 4 additions & 2 deletions src/unstable/xml/Text.winxed
Expand Up @@ -11,12 +11,14 @@ class Rosella.Xml.Text
function to_xml()
{
var sb = new 'StringBuilder';
self.to_xml_sb(sb);
self.to_xml_sb(sb, 0);
return sb;
}

function to_xml_sb(var sb)
function to_xml_sb(var sb, int indent)
{
string indent_str = "\n" + repeat_string(INDENT_STR, indent);
push(sb, indent_str);
push(sb, self.text);
}
}
17 changes: 10 additions & 7 deletions src/unstable/xml/Xml.winxed
Expand Up @@ -13,7 +13,7 @@ namespace Rosella.Xml
}
}

class Rosella.Xml
namespace Rosella.Xml
{
function read_file(string filename)
{
Expand All @@ -29,7 +29,7 @@ class Rosella.Xml
return document;
}

function __parse(string xml, var document)
function parse(string xml, var document)
{
int open_bracket = get_codepoint("<", 0);
int question_mark = get_codepoint("?", 0);
Expand All @@ -43,12 +43,13 @@ class Rosella.Xml
int i = eat_whitespace(buffer, 0);
int c = buffer[i];
if (buf_length > 5 && c == open_bracket && int(buffer[i + 1]) == question_mark)
i = self.__parse_xml_header(buffer, i + 2, document);
i = Rosella.Xml.__parse_xml_header(buffer, i + 2, document);

error_on_out_of_bounds(c, buffer, i);
for ( ; i < elements(buffer); ) {
int c = buffer[i];
if (c == open_bracket) {
:(var tag, i, int is_close, int contained, int is_comment) = self.__parse_tag(buffer, i + 1, current_tag);
:(var tag, i, int is_close, int contained, int is_comment) = Rosella.Xml.__parse_tag(buffer, i + 1, current_tag);

// A closing tag: </foo>
if (!is_comment && !contained && is_close) {
Expand Down Expand Up @@ -98,7 +99,7 @@ class Rosella.Xml
int close_bracket = get_codepoint(">", 0);
var header_tag = new Rosella.Xml.Tag.XmlHeader();
i = eat_whitespace(buffer, i);
i = self.__parse_attributes(buffer, i, header_tag);
i = Rosella.Xml.__parse_attributes(buffer, i, header_tag);
i = eat_whitespace(buffer, i);
int c = buffer[i];
if (c == question_mark && int(buffer[i + 1]) == close_bracket) {
Expand All @@ -119,7 +120,7 @@ class Rosella.Xml
int j = i;
int c = buffer[j];
if (c == exclamation_point)
return self.__parse_comment(buffer, i + 1);
return Rosella.Xml.__parse_comment(buffer, i + 1);
int is_close_tag = false;

if (c == close_slash) {
Expand All @@ -135,7 +136,7 @@ class Rosella.Xml

j = eat_whitespace(buffer, j);
if (!is_close_tag)
j = self.__parse_attributes(buffer, j, tag);
j = Rosella.Xml.__parse_attributes(buffer, j, tag);

c = buffer[j];
if (c == close_bracket)
Expand Down Expand Up @@ -178,6 +179,7 @@ class Rosella.Xml
return comment, j + 3, true, true, true;
}
}
Rosella.Error.error("Unterminated comment starting at position %d", i - 2);
}

function __parse_attributes(var buffer, int i, var tag)
Expand Down Expand Up @@ -228,6 +230,7 @@ class Rosella.Xml
}
c = buffer[j];
}
error_on_out_of_bounds(c, buffer, i);
return j;
}
}
Expand Down

0 comments on commit 55bd732

Please sign in to comment.