Skip to content

Commit

Permalink
Merge e7f0702 into 4310fbb
Browse files Browse the repository at this point in the history
  • Loading branch information
sagotch committed Apr 8, 2019
2 parents 4310fbb + e7f0702 commit e1c5f78
Show file tree
Hide file tree
Showing 3 changed files with 44 additions and 1 deletion.
2 changes: 1 addition & 1 deletion src/html_tokenizer.ml
Original file line number Diff line number Diff line change
Expand Up @@ -381,7 +381,7 @@ let tokenize report (input, get_location) =
match_named (Some (w, m)) (v::(replace @ matched)) [] trie text
| Trie.Yes m ->
let w = Buffer.contents text in
finish (Some (w, m)) (v::matched) [])
finish (Some (w, m)) (v::(replace @ matched)) [])
in
match_named
None [] [] (Lazy.force named_entity_trie) (Buffer.create 16))
Expand Down
37 changes: 37 additions & 0 deletions test/test_html_parser.ml
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,43 @@ let tests = [
1, 27, S `End_element;
1, 27, S `End_element]);

("html.parser.links" >:: fun _ ->
expect
{|<a href="foo.com?bar=on&acte=123">foo</a>|}
[ 1, 1, S (start_element "html");
1, 1, S (start_element "head");
1, 1, S `End_element;
1, 1, S (start_element "body");
1, 1, S (`Start_element ((html_ns, "a"), [(("", "href"), "foo.com?bar=on&acte=123")]));
1, 35, S (`Text ["foo"]);
1, 38, S `End_element;
1, 42, S `End_element;
1, 42, S `End_element];

expect
{|<a href="foo.com?bar=on&image=on">foo</a>|}
[ 1, 1, S (start_element "html");
1, 1, S (start_element "head");
1, 1, S `End_element;
1, 1, S (start_element "body");
1, 1, S (`Start_element ((html_ns, "a"), [(("", "href"), "foo.com?bar=on&image=on")]));
1, 35, S (`Text ["foo"]);
1, 38, S `End_element;
1, 42, S `End_element;
1, 42, S `End_element];

expect
{|<a href="foo.com?bar=on&image;">foo</a>|}
[ 1, 1, S (start_element "html");
1, 1, S (start_element "head");
1, 1, S `End_element;
1, 1, S (start_element "body");
1, 1, S (`Start_element ((html_ns, "a"), [(("", "href"), "foo.com?bar=onℑ")]));
1, 33, S (`Text ["foo"]);
1, 36, S `End_element;
1, 40, S `End_element;
1, 40, S `End_element]);

("html.parser.headings" >:: fun _ ->
expect "<p><h1><h2>foo</h2>"
[ 1, 1, S (start_element "html");
Expand Down
6 changes: 6 additions & 0 deletions test/test_html_tokenizer.ml
Original file line number Diff line number Diff line change
Expand Up @@ -884,6 +884,12 @@ let tests = [
1, 1, S (`Start (tag "foo" ["bar", "&lt="]));
1, 17, S `EOF];

expect "<foo bar='&image='>"
[ 1, 11, E (`Bad_token ("&image=", "attribute",
"unterminated entity reference followed by '='"));
1, 1, S (`Start (tag "foo" ["bar", "&image="]));
1, 20, S `EOF];

expect "<foo bar=&amp;>"
[ 1, 1, S (`Start (tag "foo" ["bar", "&"]));
1, 16, S `EOF];
Expand Down

0 comments on commit e1c5f78

Please sign in to comment.