Skip to content

Commit

Permalink
XML tokenizer: fix reading of numeric references
Browse files Browse the repository at this point in the history
Fixes #65.
  • Loading branch information
aantron committed Jun 9, 2021
1 parent 420aaa3 commit ddb2d51
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 2 deletions.
5 changes: 4 additions & 1 deletion src/xml_tokenizer.ml
Expand Up @@ -97,7 +97,10 @@ let tokenize report resolve_reference (input, get_location) =
(Printf.sprintf "&#%s%s;" reference_prefix s, "reference",
"number out of range")) !throw unresolved

| Some n -> k (char n)
| Some n ->
let utf_8_encoded = Buffer.create 8 in
add_utf_8 utf_8_encoded n;
k (Buffer.contents utf_8_encoded)
end

| _, c when filter c ->
Expand Down
6 changes: 5 additions & 1 deletion test/test_xml_tokenizer.ml
Expand Up @@ -311,7 +311,11 @@ let tests = [
("xml.tokenizer.reference" >:: fun _ ->
expect "foo<bar>&"'baz01quux"
[ 1, 1, S (`Chars ["foo<bar>&\"'baz01quux"]);
1, 50, S `EOF]);
1, 50, S `EOF];

expect "&#955;"
[ 1, 1, S (`Chars ["λ"]);
1, 7, S `EOF]);

("xml.tokenizer.bad-reference" >:: fun _ ->
expect "&"
Expand Down

0 comments on commit ddb2d51

Please sign in to comment.