Permalink
Browse files

Add support for UTF8 (simple round-trip, no parsing)

  • Loading branch information...
1 parent 90639ad commit 38ffa22c865ad2c5922eeff6a3a583b5b73998f0 Tom McNulty committed Feb 27, 2010
Showing with 14 additions and 3 deletions.
  1. +1 −0 .gitignore
  2. +10 −3 src/markdown.erl
  3. +3 −0 src/markdown_tests.erl
View
@@ -0,0 +1 @@
+*.beam
View
@@ -10,6 +10,7 @@
-module(markdown).
-export([conv/1,
+ conv_utf8/1,
conv_file/2]).
-import(lists, [flatten/1, reverse/1]).
@@ -48,6 +49,12 @@ conv(String) -> Lex = lex(String),
% io:format("TypedLines are ~p~nRefs is ~p~n",
% [TypedLines, Refs]),
parse(TypedLines, Refs).
+
+-spec conv_utf8(list()) -> list().
+conv_utf8(Utf8) ->
+ Str = xmerl_ucs:from_utf8(Utf8),
+ Res = conv(Str),
+ xmerl_ucs:to_utf8(Res).
conv_file(FileIn, FileOut) ->
case file:open(FileIn, [read]) of
@@ -760,9 +767,9 @@ make_tag_str({{{tag, Type}, Tag}, _}) ->
esc_tag(String) -> esc_t1(String, []).
-esc_t1([], Acc) -> lists:reverse(Acc);
-esc_t1([160 | T], Acc) -> esc_t1(T, [32 | Acc]); % non-breaking space to space
-esc_t1([H | T], Acc) -> esc_t1(T, [H | Acc]).
+esc_t1([], Acc) -> lists:reverse(Acc);
+esc_t1([?NBSP | T], Acc) -> esc_t1(T, [?SPACE | Acc]); % non-breaking space to space
+esc_t1([H | T], Acc) -> esc_t1(T, [H | Acc]).
%% if it is a list we need to discard the initial white space...
make_list_str([{{ws, _}, _} | T] = List) ->
@@ -367,3 +367,6 @@ unit_test_() ->
?_assert(conv("[Inline link 1 with parens](/url\\(test\\) \"title\").\n\n[Inline link 2 with parens](</url\\(test\\)> \"title\").\n\n[Inline link 3 with non-escaped parens](/url(test) \"title\").\n\n[Inline link 4 with non-escaped parens](</url(test)> \"title\").\n\n[Reference link 1 with parens][1].\n\n[Reference link 2 with parens][2].\n\n [1]: /url(test) \"title\"\n [2]: </url(test)> \"title\"\n") == "<p><a href=\"/url(test)\" title=\"title\">Inline link 1 with parens</a>.</p>\n\n<p><a href=\"/url\\(test\\\">Inline link 2 with parens</a>> \"title\").</p>\n\n<p><a href=\"/url(test\">Inline link 3 with non-escaped parens</a> \"title\").</p>\n\n<p><a href=\"/url(test\">Inline link 4 with non-escaped parens</a>> \"title\").</p>\n\n<p><a href=\"/url\" title=\"test) &quot;title\">Reference link 1 with parens</a>.</p>\n\n<p><a href=\"/url\" title=\"test)> &quot;title\">Reference link 2 with parens</a>.</p>"),
?_assert(conv("Paragraph and no space:\r* ciao\r\rParagraph and 1 space:\r * ciao\r\rParagraph and 3 spaces:\r * ciao\r\rParagraph and 4 spaces:\r * ciao\r\rParagraph before header:\r#Header\r\rParagraph before blockquote:\r>Some quote.\r") == "<p>Paragraph and no space:\n* ciao</p>\n\n<p>Paragraph and 1 space:\n * ciao</p>\n\n<p>Paragraph and 3 spaces:\n * ciao</p>\n\n<p>Paragraph and 4 spaces:\n * ciao</p>\n\n<p>Paragraph before header:</p>\n\n<h1>Header</h1>\n\n<p>Paragraph before blockquote:</p>\n\n<blockquote>\n <p>Some quote.</p>\n</blockquote>")
].
+
+utf8_test() ->
+ [?_assertEqual("<p>†</p>", conv_utf8(""))].

0 comments on commit 38ffa22

Please sign in to comment.