Permalink
Browse files

HTML/BBCode fixes

  • Loading branch information...
1 parent 41ee5fa commit b559560dc10af95a2e8faa0f508afd08690a20f6 Evan Miller committed Jun 27, 2011
Showing with 39 additions and 25 deletions.
  1. +1 −1 README.md
  2. +2 −0 src/jerome.erl
  3. +14 −11 src/jerome_bbcode_parser.yrl
  4. +10 −7 src/jerome_html_parser.yrl
  5. +12 −6 src/jerome_html_scanner.erl
View
@@ -17,6 +17,6 @@ Available formats:
Format Read/Write?
------ -----------
BBCode Read + Write
- HTML Write-only
+ HTML Read + Write
RTF Read + Write
Textile Read + Write
View
@@ -15,6 +15,8 @@ parse(Path, Format, ImageFun) when is_list(Path) ->
parse(Binary, bbcode, ImageFun) when is_binary(Binary) ->
jerome_bbcode_consumer:consume(Binary, ImageFun);
+parse(Binary, html, ImageFun) when is_binary(Binary) ->
+ jerome_html_consumer:consume(Binary, ImageFun);
parse(Binary, rtf, ImageFun) when is_binary(Binary) ->
jerome_rtf_consumer:consume(Binary, ImageFun);
parse(Binary, textile, ImageFun) when is_binary(Binary) ->
@@ -2,6 +2,7 @@
Nonterminals
Elements
+ TaggedElement
Table
TableRows
TableCells
@@ -47,17 +48,19 @@ Elements -> '$empty' : [].
Elements -> Elements Table : '$1' ++ ['$2'].
Elements -> Elements text : '$1' ++ ['$2'].
Elements -> Elements newline : '$1' ++ ['$2'].
-Elements -> open_bold Elements close_bold : {bold, '$2'}.
-Elements -> open_italic Elements close_italic : {italic, '$2'}.
-Elements -> open_underline Elements close_underline : {underline, '$2'}.
-Elements -> open_superscript Elements close_superscript : {superscript, '$2'}.
-Elements -> open_subscript Elements close_subscript : {subscript, '$2'}.
-Elements -> open_url text close_url : {hyperlink, '$2'}.
-Elements -> open_url_equals url_value Elements close_url : {hyperlink, '$2', '$3'}.
-Elements -> open_img text close_img : {image, '$2'}.
-Elements -> open_quote Elements close_quote : {quote, '$2'}.
-Elements -> open_code Elements close_code : {code, '$2'}.
-Elements -> open_list ListItems close_list : {list, '$2'}.
+Elements -> Elements TaggedElement : '$1' ++ ['$2'].
+
+TaggedElement -> open_bold Elements close_bold : {bold, '$2'}.
+TaggedElement -> open_italic Elements close_italic : {italic, '$2'}.
+TaggedElement -> open_underline Elements close_underline : {underline, '$2'}.
+TaggedElement -> open_superscript Elements close_superscript : {superscript, '$2'}.
+TaggedElement -> open_subscript Elements close_subscript : {subscript, '$2'}.
+TaggedElement -> open_url text close_url : {hyperlink, '$2'}.
+TaggedElement -> open_url_equals url_value Elements close_url : {hyperlink, '$2', '$3'}.
+TaggedElement -> open_img text close_img : {image, '$2'}.
+TaggedElement -> open_quote Elements close_quote : {quote, '$2'}.
+TaggedElement -> open_code Elements close_code : {code, '$2'}.
+TaggedElement -> open_list ListItems close_list : {list, '$2'}.
ListItems -> '$empty' : [].
ListItems -> ListItems list_item Elements : '$1' ++ [{list_item, '$3'}].
View
@@ -1,7 +1,8 @@
% HTML parser
Nonterminals
- Elements.
+ Elements
+ TaggedElement.
Terminals
open_bold
@@ -25,9 +26,11 @@ Rootsymbol
Elements -> '$empty' : [].
Elements -> Elements text : '$1' ++ ['$2'].
Elements -> Elements newline : '$1' ++ ['$2'].
-Elements -> open_bold Elements close_bold : {bold, '$2'}.
-Elements -> open_italic Elements close_italic : {italic, '$2'}.
-Elements -> open_underline Elements close_underline : {underline, '$2'}.
-Elements -> open_url Elements close_url : {hyperlink, '$1', '$2'}.
-Elements -> open_superscript Elements close_superscript : {superscript, '$1'}.
-Elements -> open_subscript Elements close_subscript : {subscript, '$1'}.
+Elements -> Elements TaggedElement : '$1' ++ ['$2'].
+
+TaggedElement -> open_bold Elements close_bold : {bold, '$2'}.
+TaggedElement -> open_italic Elements close_italic : {italic, '$2'}.
+TaggedElement -> open_underline Elements close_underline : {underline, '$2'}.
+TaggedElement -> open_url Elements close_url : {hyperlink, '$1', '$2'}.
+TaggedElement -> open_superscript Elements close_superscript : {superscript, '$1'}.
+TaggedElement -> open_subscript Elements close_subscript : {subscript, '$1'}.
@@ -77,11 +77,17 @@ scan([H|T], [{open_url, Pos, Value}|Scanned], {Row, Column}, in_url) ->
scan([$<, $/, A, $> |T], Scanned, {Row, Column} = Pos, text) when A =:= $A; A =:= $a ->
scan(T, [{close_url, Pos}|Scanned], {Row, Column} = Pos, text);
scan("\r\n"++T, [{text, TPos, Text}|Scanned], {Row, _Column}, text) ->
- scan(T, [{text, TPos, lists:reverse("\r\n", Text)}|Scanned], {Row + 1, 0}, text);
-scan("\r\n"++T, Scanned, {Row, _Column} = Pos, text) ->
- scan(T, [{text, Pos, lists:reverse("\r\n")}|Scanned], {Row + 1, 0}, text);
+ scan(T, [{text, TPos, [$\ |Text]}|Scanned], {Row + 1, 0}, text);
+scan("\r\n"++T, [{text, TPos, " "++Text}|Scanned], {Row, _Column}, text) ->
+ scan(T, [{text, TPos, [$\ |Text]}|Scanned], {Row + 1, 0}, text);
+scan("\r\n"++T, Scanned, {Row, _Column}, text) ->
+ scan(T, Scanned, {Row + 1, 0}, text);
scan("\n"++T, [{text, TPos, Text}|Scanned], {Row, _Column}, text) ->
- scan(T, [{text, TPos, [$\n|Text]}|Scanned], {Row + 1, 0}, text);
+ scan(T, [{text, TPos, [$\ |Text]}|Scanned], {Row + 1, 0}, text);
+scan("\n"++T, [{text, TPos, " "++Text}|Scanned], {Row, _Column}, text) ->
+ scan(T, [{text, TPos, [$\ |Text]}|Scanned], {Row + 1, 0}, text);
+scan("\n"++T, Scanned, {Row, _Column}, text) ->
+ scan(T, Scanned, {Row + 1, 0}, text);
scan("<"++T, Scanned, {Row, Column}, text) ->
scan(T, Scanned, {Row, Column + 1}, in_tag);
scan("\""++T, Scanned, {Row, Column}, in_tag) ->
@@ -94,8 +100,8 @@ scan("\'"++T, Scanned, {Row, Column}, in_single_quote) ->
scan(T, Scanned, {Row, Column + 1}, in_tag);
scan(">"++T, Scanned, {Row, Column}, in_tag) ->
scan(T, Scanned, {Row, Column + 1}, text);
-scan([_H|T], Scanned, {Row, Column}, in_tag) ->
- scan(T, Scanned, {Row, Column + 1}, in_tag);
+scan([_H|T], Scanned, {Row, Column}, State) when State =:= in_tag; State =:= in_double_quote; State =:= in_single_quote ->
+ scan(T, Scanned, {Row, Column + 1}, State);
scan("&amp;"++T, Scanned, {Row, Column} = Pos, text) ->
scan(T, append_text(Scanned, Pos, [$&]), {Row, Column + length("&amp;")}, text);
scan("&quot;"++T, Scanned, {Row, Column} = Pos, text) ->

0 comments on commit b559560

Please sign in to comment.