Skip to content

Commit

Permalink
decode_entities confused by trailing incomplete entity
Browse files Browse the repository at this point in the history
Mark Martinec reported crashed when running SpamAssassin, given a
particular HTML junk mail to parse.  The problem was caused by
HTML::Parsers decode_entities function confusing itself when it
encountered strings with incomplete entities at the end of the string.
  • Loading branch information
gisle committed Oct 22, 2009
1 parent 6e91cf4 commit b9aae1e
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 7 deletions.
4 changes: 3 additions & 1 deletion t/entities.t
@@ -1,6 +1,6 @@
use HTML::Entities qw(decode_entities encode_entities encode_entities_numeric);

use Test::More tests => 16;
use Test::More tests => 17;

$a = "Våre norske tegn bør &#230res";

Expand Down Expand Up @@ -71,6 +71,8 @@ is(decode_entities("abc&def&ghi&abc;&def;"), "abc&def&ghi&abc;&def;");
is(decode_entities("'"), "'");
is(encode_entities("'", "'"), "'");
is(decode_entities("Attention Home&#959&#969n&#1257rs...1&#1109t T&#1110&#1084e E&#957&#1257&#1075"),
"Attention Home\x{3BF}\x{3C9}n\x{4E9}rs...1\x{455}t T\x{456}\x{43C}e E\x{3BD}\x{4E9}\x{433}");
__END__
# Quoted from rfc1866.txt
Expand Down
12 changes: 6 additions & 6 deletions util.c
Expand Up @@ -94,14 +94,14 @@ decode_entities(pTHX_ SV* sv, HV* entity2char, bool expand_prefix)
ent_start = s;
repl = 0;

if (*s == '#') {
if (s < end && *s == '#') {
UV num = 0;
UV prev = 0;
int ok = 0;
s++;
if (*s == 'x' || *s == 'X') {
if (s < end && (*s == 'x' || *s == 'X')) {
s++;
while (*s) {
while (s < end) {
char *tmp = strchr(PL_hexdigit, *s);
if (!tmp)
break;
Expand All @@ -117,7 +117,7 @@ decode_entities(pTHX_ SV* sv, HV* entity2char, bool expand_prefix)
}
}
else {
while (isDIGIT(*s)) {
while (s < end && isDIGIT(*s)) {
num = num * 10 + (*s - '0');
if (prev && num < prev) {
/* overflow */
Expand Down Expand Up @@ -180,7 +180,7 @@ decode_entities(pTHX_ SV* sv, HV* entity2char, bool expand_prefix)
}
else {
char *ent_name = s;
while (isALNUM(*s))
while (s < end && isALNUM(*s))
s++;
if (ent_name != s && entity2char) {
SV** svp;
Expand Down Expand Up @@ -216,7 +216,7 @@ decode_entities(pTHX_ SV* sv, HV* entity2char, bool expand_prefix)

if (repl) {
char *repl_allocated = 0;
if (*s == ';')
if (s < end && *s == ';')
s++;
t--; /* '&' already copied, undo it */

Expand Down

0 comments on commit b9aae1e

Please sign in to comment.