Skip to content
Browse files

Merge branch 'html5-gh64'

  • Loading branch information...
2 parents 57f6d12 + f425e5e commit e6d1870200802f32e17e334272191397e15aec53 @etrepum etrepum committed Oct 14, 2011
Showing with 2,204 additions and 273 deletions.
  1. +9 −0 CHANGES.md
  2. +45 −0 scripts/entities.erl
  3. +1 −1 src/mochiweb.app.src
  4. +2,130 −255 src/mochiweb_charref.erl
  5. +19 −17 src/mochiweb_html.erl
View
9 CHANGES.md
@@ -1,3 +1,12 @@
+Version 2.3.0 released 2011-10-14
+
+* Handle ssl_closed message in mochiweb_http (#59)
+* Added support for new MIME types (otf, eot, m4v, svg, svgz, ttc, ttf,
+ vcf, webm, webp, woff) (#61)
+* Updated mochiweb_charref to support all HTML5 entities. Note that
+ if you are using this module directly, the spec has changed to return
+ `[integer()]` for some entities. (#64)
+
Version 2.2.1 released 2011-08-31
* Removed `mochiweb_skel` module from the pre-rebar era
View
45 scripts/entities.erl
@@ -0,0 +1,45 @@
+#!/usr/bin/env escript
+%% -*- mode: erlang -*-
+-export([main/1]).
+
+%% @doc Script used to generate mochiweb_charref.erl table.
+
+main(_) ->
+ application:start(inets),
+ code:add_patha("ebin"),
+ {ok, {_, _, HTML}} = httpc:request("http://www.w3.org/TR/html5/named-character-references.html"),
+ print(lists:sort(search(mochiweb_html:parse(HTML)))).
+
+print([F | T]) ->
+ io:put_chars([clause(F), ";\n"]),
+ print(T);
+print([]) ->
+ io:put_chars(["entity(_) -> undefined.\n"]),
+ ok.
+
+clause({Title, [Codepoint]}) ->
+ ["entity(\"", Title, "\") -> 16#", Codepoint];
+clause({Title, [First | Rest]}) ->
+ ["entity(\"", Title, "\") -> [16#", First,
+ [[", 16#", Codepoint] || Codepoint <- Rest],
+ "]"].
+
+
+search(Elem) ->
+ search(Elem, []).
+
+search({<<"tr">>, [{<<"id">>, <<"entity-", _/binary>>} | _], Children}, Acc) ->
+ %% HTML5 charrefs can have more than one code point(!)
+ [{<<"td">>, _, [{<<"code">>, _, [TitleSemi]}]},
+ {<<"td">>, [], [RawCPs]} | _] = Children,
+ L = byte_size(TitleSemi) - 1,
+ <<Title:L/binary, $;>> = TitleSemi,
+ {match, Matches} = re:run(RawCPs, "(?:\\s*U\\+)([a-fA-F0-9]+)",
+ [{capture, all, binary}, global]),
+ [{Title, [CP || [_, CP] <- Matches]} | Acc];
+search({Tag, Attrs, [H | T]}, Acc) ->
+ search({Tag, Attrs, T}, search(H, Acc));
+search({_Tag, _Attrs, []}, Acc) ->
+ Acc;
+search(<<_/binary>>, Acc) ->
+ Acc.
View
2 src/mochiweb.app.src
@@ -1,7 +1,7 @@
%% This is generated from src/mochiweb.app.src
{application, mochiweb,
[{description, "MochiMedia Web Server"},
- {vsn, "2.2.1"},
+ {vsn, "2.3.0"},
{modules, []},
{registered, []},
{env, []},
View
2,385 src/mochiweb_charref.erl
2,130 additions, 255 deletions not shown because the diff is too large. Please use a local Git client to view these changes.
View
36 src/mochiweb_html.erl
@@ -480,7 +480,7 @@ tokenize_attr_value(Attr, B, S) ->
_ ->
{Attr, S1}
end.
-
+
tokenize_quoted_or_unquoted_attr_value(B, S=#decoder{offset=O}) ->
case B of
<<_:O/binary>> ->
@@ -491,7 +491,7 @@ tokenize_quoted_or_unquoted_attr_value(B, S=#decoder{offset=O}) ->
<<_:O/binary, _/binary>> ->
tokenize_unquoted_attr_value(B, S, [])
end.
-
+
tokenize_quoted_attr_value(B, S=#decoder{offset=O}, Acc, Q) ->
case B of
<<_:O/binary>> ->
@@ -506,7 +506,7 @@ tokenize_quoted_attr_value(B, S=#decoder{offset=O}, Acc, Q) ->
<<_:O/binary, C, _/binary>> ->
tokenize_quoted_attr_value(B, ?INC_COL(S), [C|Acc], Q)
end.
-
+
tokenize_unquoted_attr_value(B, S=#decoder{offset=O}, Acc) ->
case B of
<<_:O/binary>> ->
@@ -520,7 +520,7 @@ tokenize_unquoted_attr_value(B, S=#decoder{offset=O}, Acc) ->
{ iolist_to_binary(lists:reverse(Acc)), S };
<<_:O/binary, C, _/binary>> ->
tokenize_unquoted_attr_value(B, ?INC_COL(S), [C|Acc])
- end.
+ end.
skip_whitespace(B, S=#decoder{offset=O}) ->
case B of
@@ -627,8 +627,10 @@ tokenize_charref(Bin, S=#decoder{offset=O}, Start) ->
Len1 = Len + 2,
<<_:Start1/binary, R:Len1/binary, _/binary>> = Bin,
R;
- Unichar ->
- mochiutf8:codepoint_to_bytes(Unichar)
+ Unichar when is_integer(Unichar) ->
+ mochiutf8:codepoint_to_bytes(Unichar);
+ Unichars when is_list(Unichars) ->
+ unicode:characters_to_binary(Unichars)
end,
{{data, Data, false}, ?INC_COL(S)};
_ ->
@@ -1195,43 +1197,43 @@ parse_unquoted_attr_test() ->
{ <<"img">>, [ { <<"src">>, <<"/images/icon.png">> } ], [] }
]},
mochiweb_html:parse(D0)),
-
+
D1 = <<"<html><img src=/images/icon.png></img></html>">>,
?assertEqual(
{<<"html">>,[],[
{ <<"img">>, [ { <<"src">>, <<"/images/icon.png">> } ], [] }
]},
mochiweb_html:parse(D1)),
-
+
D2 = <<"<html><img src=/images/icon&gt;.png width=100></img></html>">>,
?assertEqual(
{<<"html">>,[],[
{ <<"img">>, [ { <<"src">>, <<"/images/icon>.png">> }, { <<"width">>, <<"100">> } ], [] }
]},
mochiweb_html:parse(D2)),
- ok.
-
-parse_quoted_attr_test() ->
+ ok.
+
+parse_quoted_attr_test() ->
D0 = <<"<html><img src='/images/icon.png'></html>">>,
?assertEqual(
{<<"html">>,[],[
{ <<"img">>, [ { <<"src">>, <<"/images/icon.png">> } ], [] }
]},
- mochiweb_html:parse(D0)),
-
+ mochiweb_html:parse(D0)),
+
D1 = <<"<html><img src=\"/images/icon.png'></html>">>,
?assertEqual(
{<<"html">>,[],[
{ <<"img">>, [ { <<"src">>, <<"/images/icon.png'></html>">> } ], [] }
]},
- mochiweb_html:parse(D1)),
+ mochiweb_html:parse(D1)),
D2 = <<"<html><img src=\"/images/icon&gt;.png\"></html>">>,
?assertEqual(
{<<"html">>,[],[
{ <<"img">>, [ { <<"src">>, <<"/images/icon>.png">> } ], [] }
]},
- mochiweb_html:parse(D2)),
+ mochiweb_html:parse(D2)),
ok.
parse_missing_attr_name_test() ->
@@ -1245,7 +1247,7 @@ parse_broken_pi_test() ->
D0 = <<"<html><?xml:namespace prefix = o ns = \"urn:schemas-microsoft-com:office:office\" /></html>">>,
?assertEqual(
{<<"html">>, [], [
- { pi, <<"xml:namespace">>, [ { <<"prefix">>, <<"o">> },
+ { pi, <<"xml:namespace">>, [ { <<"prefix">>, <<"o">> },
{ <<"ns">>, <<"urn:schemas-microsoft-com:office:office">> } ] }
] },
mochiweb_html:parse(D0)),
@@ -1260,5 +1262,5 @@ parse_funny_singletons_test() ->
] },
mochiweb_html:parse(D0)),
ok.
-
+
-endif.

0 comments on commit e6d1870

Please sign in to comment.
Something went wrong with that request. Please try again.