Skip to content

Commit

Permalink
fix rt 44715: passe the recover flag correctly to the html parser
Browse files Browse the repository at this point in the history
  • Loading branch information
pajas committed Sep 25, 2009
1 parent aa2ff7b commit 0c7d04b
Show file tree
Hide file tree
Showing 2 changed files with 42 additions and 7 deletions.
15 changes: 9 additions & 6 deletions LibXML.xs
Expand Up @@ -2013,9 +2013,10 @@ _parse_html_string(self, string, svURL, svEncoding, options = 0)
if (encoding == NULL && SvUTF8( string )) {
encoding = "UTF-8";
}
recover = LibXML_get_recover(real_obj);
if (options & HTML_PARSE_RECOVER) {
recover = ((options & HTML_PARSE_NOERROR) ? 2 : 1);
}
#if LIBXML_VERSION >= 20627
if (recover) options |= HTML_PARSE_RECOVER;
real_doc = htmlReadDoc((xmlChar*)ptr, URL, encoding, options);
#else
real_doc = htmlParseDoc((xmlChar*)ptr, encoding);
Expand Down Expand Up @@ -2073,9 +2074,10 @@ _parse_html_file(self, filename_sv, svURL, svEncoding, options = 0)
RETVAL = &PL_sv_undef;
INIT_ERROR_HANDLER;
real_obj = LibXML_init_parser(self,NULL);
recover = LibXML_get_recover(real_obj);
if (options & HTML_PARSE_RECOVER) {
recover = ((options & HTML_PARSE_NOERROR) ? 2 : 1);
}
#if LIBXML_VERSION >= 20627
if (recover) options |= HTML_PARSE_RECOVER;
real_doc = htmlReadFile((const char *)filename,
encoding,
options);
Expand Down Expand Up @@ -2131,9 +2133,10 @@ _parse_html_fh(self, fh, svURL, svEncoding, options = 0)
RETVAL = &PL_sv_undef;
INIT_ERROR_HANDLER;
real_obj = LibXML_init_parser(self,NULL);
recover = LibXML_get_recover(real_obj);
if (options & HTML_PARSE_RECOVER) {
recover = ((options & HTML_PARSE_NOERROR) ? 2 : 1);
}
#if LIBXML_VERSION >= 20627
if (recover) options |= HTML_PARSE_RECOVER;

real_doc = htmlReadIO((xmlInputReadCallback) LibXML_read_perl,
NULL,
Expand Down
34 changes: 33 additions & 1 deletion t/12html.t
@@ -1,5 +1,5 @@
use Test;
BEGIN { plan tests => 38 }
BEGIN { plan tests => 41 }
use XML::LibXML;
use IO::File;
ok(1);
Expand Down Expand Up @@ -209,3 +209,35 @@ print "parse example/enc2_latin2.html...\n";
ok($htmldoc->findvalue('//p/text()'), $utf_str);
}
}


{
# 44715

my $html = <<'EOF';
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<title>Test &amp; Test some more</title>
</head>
<body>
<p>Meet you at the caf&eacute;?</p>
<p>How about <a href="http://example.com?mode=cafe&id=1&ref=foo">this one</a>?
</p>
<input class="wibble" id="foo" value="working" />
</body>
</html>
EOF
my $parser = XML::LibXML->new;
eval {
$doc = $parser->parse_html_string(
$html => { recover => 1, suppress_errors => 1 }
);
};
ok(!$@);
ok($doc);
my $root = $doc && $doc->documentElement;
my $val = $root && $root->findvalue('//input[@id="foo"]/@value');
ok($val eq 'working');
}

0 comments on commit 0c7d04b

Please sign in to comment.