Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

Fix for parsing broken processing instructions.

Some HTML contains broken processing instructions, such as this one
encountered in the wild: `<?xml:namespace prefix = o ns = "urn:schemas-microsoft-com:office:office" />`.

The parser crashes on this since it only accepts `?>` (or end-of-file) as
the end of a processing instruction. This patch fixes it by also
allowing `>` or `/>` to end a processing instruction.
  • Loading branch information...
commit 9e45e2ebf93caba04d8b09272ced7678dc9561a3 1 parent 3bea608
@jkoops jkoops authored etrepum committed
Showing with 14 additions and 0 deletions.
  1. +14 −0 src/mochiweb_html.erl
View
14 src/mochiweb_html.erl
@@ -576,6 +576,10 @@ find_qgt(Bin, S=#decoder{offset=O}) ->
case Bin of
<<_:O/binary, "?>", _/binary>> ->
?ADV_COL(S, 2);
+ <<_:O/binary, ">", _/binary>> ->
+ ?ADV_COL(S, 1);
+ <<_:O/binary, "/>", _/binary>> ->
+ ?ADV_COL(S, 2);
%% tokenize_attributes takes care of this state:
%% <<_:O/binary, C, _/binary>> ->
%% find_qgt(Bin, ?INC_CHAR(S, C));
@@ -1236,5 +1240,15 @@ parse_missing_attr_name_test() ->
{<<"html">>, [ { <<"=">>, <<"=">> }, { <<"black">>, <<"black">> } ], [] },
mochiweb_html:parse(D0)),
ok.
+
+parse_broken_pi_test() ->
+ D0 = <<"<html><?xml:namespace prefix = o ns = \"urn:schemas-microsoft-com:office:office\" /></html>">>,
+ ?assertEqual(
+ {<<"html">>, [], [
+ { pi, <<"xml:namespace">>, [ { <<"prefix">>, <<"o">> },
+ { <<"ns">>, <<"urn:schemas-microsoft-com:office:office">> } ] }
+ ] },
+ mochiweb_html:parse(D0)),
+ ok.
-endif.
Please sign in to comment.
Something went wrong with that request. Please try again.