Browse files

Fix shift/reduce conflicts in textile parser

  • Loading branch information...
1 parent b559560 commit 4fadd44fa6f8a29824e613474358c0aeae314321 Evan Miller committed Jun 27, 2011
Showing with 127 additions and 84 deletions.
  1. +2 −0 src/jerome_html_scanner.erl
  2. +27 −18 src/jerome_textile_parser.yrl
  3. +98 −66 src/jerome_textile_scanner.erl
View
2 src/jerome_html_scanner.erl
@@ -64,6 +64,8 @@ scan("<br>"++T, Scanned, {Row, Column} = Pos, text) ->
scan(T, [{newline, Pos}|Scanned], {Row, Column + length("<br>")}, text);
scan("<BR>"++T, Scanned, {Row, Column} = Pos, text) ->
scan(T, [{newline, Pos}|Scanned], {Row, Column + length("<br>")}, text);
+scan("<div>"++T, Scanned, {Row, Column} = Pos, text) ->
+ scan(T, [{newline, Pos}|Scanned], {Row, Column + length("<div>")}, text);
scan("<a href=\""++T, Scanned, {Row, Column} = Pos, text) ->
scan(T, [{open_url, Pos, ""}|Scanned], {Row, Column + length("<a href=\"")}, in_url);
scan("<A HREF=\""++T, Scanned, {Row, Column} = Pos, text) ->
View
45 src/jerome_textile_parser.yrl
@@ -17,26 +17,34 @@ Nonterminals
Terminals
block_tag
punctuation
- single_star
- double_star
- single_underscore
- double_underscore
+ single_star_even
+ single_star_odd
+ double_star_even
+ double_star_odd
+ single_underscore_even
+ single_underscore_odd
+ double_underscore_even
+ double_underscore_odd
newline
text
header_cell_start
cell_delimiter
url
image
double_quote
- caret
- tilde.
+ double_quote_open
+ double_quote_close
+ caret_even
+ caret_odd
+ tilde_even
+ tilde_odd.
Rootsymbol
Elements.
Elements -> '$empty' : [].
Elements -> Elements TableRow : '$1' ++ ['$2'].
-Elements -> Elements NonEmptyTextElements : '$1' ++ '$2'.
+Elements -> Elements TextElement : '$1' ++ ['$2'].
Elements -> Elements newline : '$1' ++ ['$2'].
Elements -> Elements BlockTag : '$1' ++ ['$2'].
@@ -65,20 +73,21 @@ TextElement -> text : '$1'.
TextElement -> punctuation : '$1'.
TextElement -> double_quote : '$1'.
TextElement -> image : '$1'.
-TextElement -> single_star NonEmptyTextElements single_star : {strong, '$2'}.
-TextElement -> double_star NonEmptyTextElements double_star : {bold, '$2'}.
-TextElement -> single_underscore NonEmptyTextElements single_underscore : {em, '$2'}.
-TextElement -> double_underscore NonEmptyTextElements double_underscore : {italic, '$2'}.
-TextElement -> double_quote NonEmptyLinkElements double_quote url : {hyperlink, '$2', '$4'}.
-TextElement -> caret NonEmptyTextElements caret : {superscript, '$2'}.
-TextElement -> tilde NonEmptyTextElements tilde : {subscript, '$2'}.
+TextElement -> single_star_even NonEmptyTextElements single_star_odd : {strong, '$2'}.
+TextElement -> double_star_even NonEmptyTextElements double_star_odd : {bold, '$2'}.
+TextElement -> single_underscore_even NonEmptyTextElements single_underscore_odd : {em, '$2'}.
+TextElement -> double_underscore_even NonEmptyTextElements double_underscore_odd : {italic, '$2'}.
+TextElement -> double_quote_open NonEmptyLinkElements double_quote_close url : {hyperlink, '$2', '$4'}.
+TextElement -> caret_even NonEmptyTextElements caret_odd : {superscript, '$2'}.
+TextElement -> tilde_even NonEmptyTextElements tilde_odd : {subscript, '$2'}.
NonEmptyLinkElements -> LinkElement : ['$1'].
NonEmptyLinkElements -> NonEmptyLinkElements LinkElement : ['$1'].
LinkElement -> text : '$1'.
LinkElement -> punctuation : '$1'.
-LinkElement -> single_star NonEmptyLinkElements single_star : {strong, '$2'}.
-LinkElement -> double_star NonEmptyLinkElements double_star : {bold, '$2'}.
-LinkElement -> single_underscore NonEmptyLinkElements single_underscore : {em, '$2'}.
-LinkElement -> double_underscore NonEmptyLinkElements double_underscore : {italic, '$2'}.
+LinkElement -> single_star_even NonEmptyLinkElements single_star_odd : {strong, '$2'}.
+LinkElement -> double_star_even NonEmptyLinkElements double_star_odd : {bold, '$2'}.
+LinkElement -> single_underscore_even NonEmptyLinkElements single_underscore_odd : {em, '$2'}.
+LinkElement -> double_underscore_even NonEmptyLinkElements double_underscore_odd : {italic, '$2'}.
+
View
164 src/jerome_textile_scanner.erl
@@ -2,9 +2,18 @@
-compile(export_all).
+-record(ctx, {
+ state,
+ single_star_count = 0,
+ double_star_count = 0,
+ single_underscore_count = 0,
+ double_underscore_count = 0,
+ tilde_count = 0,
+ caret_count = 0
+ }).
scan(Textile) ->
- scan(Textile, [], {1, 1}, newline).
+ scan(Textile, [], {1, 1}, #ctx{ state = newline }).
scan([], Scanned, _, _) ->
{ok, lists:reverse(
@@ -19,73 +28,96 @@ scan([], Scanned, _, _) ->
Token
end, Scanned))};
-scan("* "++T, Scanned, {Row, Column} = Pos, newline) ->
- scan(T, [{block_tag, Pos, "*"}|Scanned], {Row, Column + length("* ")}, inline);
-scan("# "++T, Scanned, {Row, Column} = Pos, newline) ->
- scan(T, [{block_tag, Pos, "#"}|Scanned], {Row, Column + length("# ")}, inline);
-scan("**"++T, Scanned, {Row, Column} = Pos, _) ->
- scan(T, [{double_star, Pos}|Scanned], {Row, Column + 2}, inline);
-scan("*"++T, Scanned, {Row, Column} = Pos, _) ->
- scan(T, [{single_star, Pos}|Scanned], {Row, Column + 1}, inline);
-scan("__"++T, Scanned, {Row, Column} = Pos, _) ->
- scan(T, [{double_underscore, Pos}|Scanned], {Row, Column + 2}, inline);
-scan("_"++T, Scanned, {Row, Column} = Pos, _) ->
- scan(T, [{single_underscore, Pos}|Scanned], {Row, Column + 1}, inline);
-scan("~"++T, Scanned, {Row, Column} = Pos, _) ->
- scan(T, [{tilde, Pos}|Scanned], {Row, Column + 1}, inline);
-scan("^"++T, Scanned, {Row, Column} = Pos, _) ->
- scan(T, [{caret, Pos}|Scanned], {Row, Column + 1}, inline);
-scan(" - "++T, Scanned, {Row, Column} = Pos, _) ->
- scan(T, [{punctuation, Pos, " - "}|Scanned], {Row, Column + length(" - ")}, inline);
-scan(" -- "++T, Scanned, {Row, Column} = Pos, _) ->
- scan(T, [{punctuation, Pos, " -- "}|Scanned], {Row, Column + length(" -- ")}, inline);
-scan("(tm)"++T, Scanned, {Row, Column} = Pos, _) ->
- scan(T, [{punctuation, Pos, "(tm)"}|Scanned], {Row, Column + length("(tm)")}, inline);
-scan([$(, C, $)|T], Scanned, {Row, Column} = Pos, _) when C=:=$c; C=:=$r ->
- scan(T, [{punctuation, Pos, [$(, C, $)] }|Scanned], {Row, Column + length("(r)")}, inline);
-scan(" x "++T, Scanned, {Row, Column} = Pos, _) ->
- scan(T, [{punctuation, Pos, " x "}|Scanned], {Row, Column + length(" x ")}, inline);
+scan("* "++T, Scanned, {Row, Column} = Pos, #ctx{ state = newline } = Ctx) ->
+ scan(T, [{block_tag, Pos, "*"}|Scanned], {Row, Column + length("* ")}, Ctx#ctx{ state = inline });
+scan("# "++T, Scanned, {Row, Column} = Pos, #ctx{ state = newline } = Ctx) ->
+ scan(T, [{block_tag, Pos, "#"}|Scanned], {Row, Column + length("# ")}, Ctx#ctx{ state = inline });
+scan("**"++T, Scanned, {Row, Column} = Pos, #ctx{ double_star_count = Count} = Ctx) when Count rem 2 =:= 0 ->
+ scan(T, [{double_star_even, Pos}|Scanned], {Row, Column + 2}, Ctx#ctx{ double_star_count = Count + 1, state = inline });
+scan("**"++T, Scanned, {Row, Column} = Pos, #ctx{ double_star_count = Count} = Ctx) when Count rem 2 =:= 1 ->
+ scan(T, [{double_star_odd, Pos}|Scanned], {Row, Column + 2}, Ctx#ctx{ double_star_count = Count + 1, state = inline });
+scan("*"++T, Scanned, {Row, Column} = Pos, #ctx{ single_star_count = Count} = Ctx) when Count rem 2 =:= 0 ->
+ scan(T, [{single_star_even, Pos}|Scanned], {Row, Column + 1}, Ctx#ctx{ single_star_count = Count + 1, state = inline});
+scan("*"++T, Scanned, {Row, Column} = Pos, #ctx{ single_star_count = Count} = Ctx) when Count rem 2 =:= 1 ->
+ scan(T, [{single_star_odd, Pos}|Scanned], {Row, Column + 1}, Ctx#ctx{ single_star_count = Count + 1, state = inline });
+scan("__"++T, Scanned, {Row, Column} = Pos, #ctx{ double_underscore_count = Count } = Ctx) when Count rem 2 =:= 0 ->
+ scan(T, [{double_underscore_even, Pos}|Scanned], {Row, Column + 2}, Ctx#ctx{ double_underscore_count = Count + 1, state = inline });
+scan("__"++T, Scanned, {Row, Column} = Pos, #ctx{ double_underscore_count = Count } = Ctx) when Count rem 2 =:= 1 ->
+ scan(T, [{double_underscore_odd, Pos}|Scanned], {Row, Column + 2}, Ctx#ctx{ double_underscore_count = Count + 1, state = inline });
+scan("_"++T, Scanned, {Row, Column} = Pos, #ctx{ single_underscore_count = Count } = Ctx) when Count rem 2 =:= 0 ->
+ scan(T, [{single_underscore_even, Pos}|Scanned], {Row, Column + 1}, Ctx#ctx{ single_underscore_count = Count + 1, state = inline });
+scan("_"++T, Scanned, {Row, Column} = Pos, #ctx{ single_underscore_count = Count } = Ctx) when Count rem 2 =:= 1 ->
+ scan(T, [{single_underscore_odd, Pos}|Scanned], {Row, Column + 1}, Ctx#ctx{ single_underscore_count = Count + 1, state = inline });
+scan("~"++T, Scanned, {Row, Column} = Pos, #ctx{ tilde_count = Count } = Ctx) when Count rem 2 =:= 0 ->
+ scan(T, [{tilde_even, Pos}|Scanned], {Row, Column + 1}, Ctx#ctx{ tilde_count = Count + 1, state = inline });
+scan("~"++T, Scanned, {Row, Column} = Pos, #ctx{ tilde_count = Count } = Ctx) when Count rem 2 =:= 1 ->
+ scan(T, [{tilde_odd, Pos}|Scanned], {Row, Column + 1}, Ctx#ctx{ tilde_count = Count + 1, state = inline });
+scan("^"++T, Scanned, {Row, Column} = Pos, #ctx{ caret_count = Count} = Ctx) when Count rem 2 =:= 0 ->
+ scan(T, [{caret_even, Pos}|Scanned], {Row, Column + 1}, Ctx#ctx{ caret_count = Count + 1, state = inline });
+scan("^"++T, Scanned, {Row, Column} = Pos, #ctx{ caret_count = Count} = Ctx) when Count rem 2 =:= 1 ->
+ scan(T, [{caret_odd, Pos}|Scanned], {Row, Column + 1}, Ctx#ctx{ caret_count = Count + 1, state = inline });
+scan(" - "++T, Scanned, {Row, Column} = Pos, Ctx) ->
+ scan(T, [{punctuation, Pos, " - "}|Scanned], {Row, Column + length(" - ")}, Ctx#ctx{ state = inline });
+scan(" -- "++T, Scanned, {Row, Column} = Pos, Ctx) ->
+ scan(T, [{punctuation, Pos, " -- "}|Scanned], {Row, Column + length(" -- ")}, Ctx#ctx{ state = inline });
+scan("(tm)"++T, Scanned, {Row, Column} = Pos, Ctx) ->
+ scan(T, [{punctuation, Pos, "(tm)"}|Scanned], {Row, Column + length("(tm)")}, Ctx#ctx{ state = inline });
+scan([$(, C, $)|T], Scanned, {Row, Column} = Pos, Ctx) when C=:=$c; C=:=$r ->
+ scan(T, [{punctuation, Pos, [$(, C, $)] }|Scanned], {Row, Column + length("(r)")}, Ctx#ctx{ state = inline });
+scan(" x "++T, Scanned, {Row, Column} = Pos, Ctx) ->
+ scan(T, [{punctuation, Pos, " x "}|Scanned], {Row, Column + length(" x ")}, Ctx#ctx{ state = inline });
scan("\r\n"++T, Scanned, {Row, _Column} = Pos, _) ->
- scan(T, [{newline, Pos}|Scanned], {Row + 1, 0}, newline);
+ scan(T, [{newline, Pos}|Scanned], {Row + 1, 0}, #ctx{ state = newline });
scan("\n"++T, Scanned, {Row, _Column} = Pos, _) ->
- scan(T, [{newline, Pos}|Scanned], {Row + 1, 0}, newline);
-scan([$h, D, $., $\ |T], Scanned, {Row, Column} = Pos, newline) when D>=$0, D=<$9 ->
- scan(T, [{block_tag, Pos, [$h, D]}|Scanned], {Row, Column + length("hX. ")}, inline);
-scan([$f, $n, D, $., $\ |T], Scanned, {Row, Column} = Pos, newline) when D>=$0, D=<$9 ->
- scan(T, [{block_tag, Pos, [$f, $n, D]}|Scanned], {Row, Column + length("fnX. ")}, inline);
-scan("bq. "++T, Scanned, {Row, Column} = Pos, newline) ->
- scan(T, [{block_tag, Pos, "bq"}|Scanned], {Row, Column + length("bq. ")}, inline);
-scan("p. "++T, Scanned, {Row, Column} = Pos, newline) ->
- scan(T, [{block_tag, Pos, "p"}|Scanned], {Row, Column + length("p. ")}, inline);
-scan("bc. "++T, Scanned, {Row, Column} = Pos, newline) ->
- scan(T, [{block_tag, Pos, "bc"}|Scanned], {Row, Column + length("bc. ")}, inline);
-scan("pre. "++T, Scanned, {Row, Column} = Pos, newline) ->
- scan(T, [{block_tag, Pos, "pre"}|Scanned], {Row, Column + length("pre. ")}, inline);
+ scan(T, [{newline, Pos}|Scanned], {Row + 1, 0}, #ctx{ state = newline });
+scan([$h, D, $., $\ |T], Scanned, {Row, Column} = Pos, #ctx{ state = newline } = Ctx) when D>=$0, D=<$9 ->
+ scan(T, [{block_tag, Pos, [$h, D]}|Scanned], {Row, Column + length("hX. ")}, Ctx#ctx{ state = inline });
+scan([$f, $n, D, $., $\ |T], Scanned, {Row, Column} = Pos, #ctx{ state = newline } = Ctx) when D>=$0, D=<$9 ->
+ scan(T, [{block_tag, Pos, [$f, $n, D]}|Scanned], {Row, Column + length("fnX. ")}, Ctx#ctx{ state = inline });
+scan("bq. "++T, Scanned, {Row, Column} = Pos, #ctx{ state = newline } = Ctx) ->
+ scan(T, [{block_tag, Pos, "bq"}|Scanned], {Row, Column + length("bq. ")}, Ctx#ctx{ state = inline });
+scan("p. "++T, Scanned, {Row, Column} = Pos, #ctx{ state = newline } = Ctx) ->
+ scan(T, [{block_tag, Pos, "p"}|Scanned], {Row, Column + length("p. ")}, Ctx#ctx{ state = inline });
+scan("bc. "++T, Scanned, {Row, Column} = Pos, #ctx{ state = newline } = Ctx) ->
+ scan(T, [{block_tag, Pos, "bc"}|Scanned], {Row, Column + length("bc. ")}, Ctx#ctx{ state = inline });
+scan("pre. "++T, Scanned, {Row, Column} = Pos, #ctx{ state = newline } = Ctx) ->
+ scan(T, [{block_tag, Pos, "pre"}|Scanned], {Row, Column + length("pre. ")}, Ctx#ctx{ state = inline });
scan("|_. "++T, Scanned, {Row, Column} = Pos, _) ->
- scan(T, [{header_cell_start, Pos}|Scanned], {Row, Column + length("|_. ")}, inline);
+ scan(T, [{header_cell_start, Pos}|Scanned], {Row, Column + length("|_. ")}, #ctx{ state = inline });
scan("|"++T, Scanned, {Row, Column} = Pos, _) ->
- scan(T, [{cell_delimiter, Pos}|Scanned], {Row, Column + 1}, inline);
-scan("\":http://"++T, Scanned, {Row, Column} = Pos, inline) ->
- scan(T, lists:reverse([{double_quote, Pos}, {url, {Row, Column + 2}, lists:reverse("http://")}], Scanned),
- {Row, Column + length("\":http://")}, inlink);
-scan("\""++T, Scanned, {Row, Column} = Pos, inline) ->
- scan(T, [{double_quote, Pos, [$"]}|Scanned], {Row, Column + 1}, inline);
-scan("!http://"++T, Scanned, {Row, Column} = Pos, inline) ->
+ scan(T, [{cell_delimiter, Pos}|Scanned], {Row, Column + 1}, #ctx{ state = inline });
+scan("\":http://"++T, Scanned, {Row, Column} = Pos, #ctx{ state = inline } = Ctx) ->
+ Scanned1 = mark_previous_double_quote(Scanned),
+ scan(T, lists:reverse([{double_quote_close, Pos}, {url, {Row, Column + 2}, lists:reverse("http://")}], Scanned1),
+ {Row, Column + length("\":http://")}, Ctx#ctx{ state = inlink });
+scan("\""++T, Scanned, {Row, Column} = Pos, Ctx) ->
+ scan(T, [{double_quote, Pos, [$"]}|Scanned], {Row, Column + 1}, Ctx#ctx{ state = inline });
+scan("!http://"++T, Scanned, {Row, Column} = Pos, #ctx{ state = inline } = Ctx) ->
scan(T, [{image, Pos, lists:reverse("http://")}|Scanned],
- {Row, Column + length("!http://")}, inimage);
-scan("!"++T, Scanned, {Row, Column}, inimage) ->
- scan(T, Scanned, {Row, Column + 1}, inline);
-scan([H|T], [{url, IPos, Link}|Scanned], {Row, Column}, inimage) ->
- scan(T, [{url, IPos, [H|Link]}|Scanned], {Row, Column + 1}, inimage);
-scan(" "++T, Scanned, {Row, Column} = Pos, inlink) ->
- scan(T, [{text, Pos, " "}|Scanned], {Row, Column + 1}, inline);
-scan(". "++T, Scanned, {Row, Column} = Pos, inlink) ->
- scan(T, [{text, Pos, ". "}|Scanned], {Row, Column + 2}, inline);
-scan([H|T], [{url, HPos, Link}|Scanned], {Row, Column}, inlink) ->
- scan(T, [{url, HPos, [H|Link]}|Scanned], {Row, Column}, inlink);
-scan([H|T], [{text, TPos, Text}|Scanned], {Row, Column}, inline) ->
- scan(T, [{text, TPos, [H|Text]}|Scanned], {Row, Column + 1}, inline);
-scan([H|T], Scanned, {Row, Column} = Pos, inline) ->
- scan(T, [{text, Pos, [H]}|Scanned], {Row, Column + 1}, inline);
-scan([H|T], Scanned, {Row, Column} = Pos, newline) ->
- scan(T, [{text, Pos, [H]}|Scanned], {Row, Column + 1}, inline).
+ {Row, Column + length("!http://")}, Ctx#ctx{ state = inimage });
+scan("!"++T, Scanned, {Row, Column}, #ctx{ state = inimage } = Ctx) ->
+ scan(T, Scanned, {Row, Column + 1}, Ctx#ctx{ state = inline });
+scan([H|T], [{url, IPos, Link}|Scanned], {Row, Column}, #ctx{ state = inimage } = Ctx) ->
+ scan(T, [{url, IPos, [H|Link]}|Scanned], {Row, Column + 1}, Ctx);
+scan(" "++T, Scanned, {Row, Column} = Pos, #ctx{ state = inlink } = Ctx) ->
+ scan(T, [{text, Pos, " "}|Scanned], {Row, Column + 1}, Ctx#ctx{ state = inline });
+scan(". "++T, Scanned, {Row, Column} = Pos, #ctx{ state = inlink } = Ctx) ->
+ scan(T, [{text, Pos, ". "}|Scanned], {Row, Column + 2}, Ctx#ctx{ state = inline });
+scan([H|T], [{url, HPos, Link}|Scanned], {Row, Column}, #ctx{ state = inlink } = Ctx) ->
+ scan(T, [{url, HPos, [H|Link]}|Scanned], {Row, Column}, Ctx);
+scan([H|T], [{text, TPos, Text}|Scanned], {Row, Column}, #ctx{ state = inline } = Ctx) ->
+ scan(T, [{text, TPos, [H|Text]}|Scanned], {Row, Column + 1}, Ctx);
+scan([H|T], Scanned, {Row, Column} = Pos, #ctx{ state = inline } = Ctx) ->
+ scan(T, [{text, Pos, [H]}|Scanned], {Row, Column + 1}, Ctx);
+scan([H|T], Scanned, {Row, Column} = Pos, #ctx{ state = newline } = Ctx) ->
+ scan(T, [{text, Pos, [H]}|Scanned], {Row, Column + 1}, Ctx#ctx{ state = inline }).
+
+mark_previous_double_quote(Scanned) ->
+ mark_previous_double_quote(Scanned, []).
+
+mark_previous_double_quote([], Acc) ->
+ lists:reverse(Acc);
+mark_previous_double_quote([{double_quote, Pos, Val}|Rest], Acc) ->
+ lists:reverse([{double_quote_open, Pos, Val}|Acc], Rest);
+mark_previous_double_quote([H|T], Acc) ->
+ mark_previous_double_quote(T, [H|Acc]).

0 comments on commit 4fadd44

Please sign in to comment.