Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Newer
Older
100644 371 lines (315 sloc) 10.414 kB
a4e794d @joearms starting
authored
1 %% Copyright (c) 2006-2009 Joe Armstrong
2 %% See MIT-LICENSE for licensing information.
3
4 %% title markup language 9 - generalised markup language
5 %% end
6
7 -module(elib1_ml9).
8
9 %% Time-stamp: <2009-11-19 10:47:54 ejoearm>
10
11 -export([doc/0,
12 run/1,
13 batch/1,
14 break_into_paras/1,
15 fetch/2,
16 search/2,
17 parse_file/1,
18 parse_binary/1,
19 parse_string/1,
20 parse_para/1]).
21
22 -import(lists, [member/2, map/2, reverse/1, reverse/2]).
23
24 -define(IN(X,Min,Max), X >= Min, X =< Max).
25 -define(DIGIT(X), X >= $0, X =< $9).
26 -define(LETTER(X), X >= $a, X =< $z ; X >= $A, X =< $Z).
27 -define(UPPER(X), X >= $A, X =< $Z).
28
29 doc() ->
30 run("doc.ml9").
31
32 batch([A]) ->
33 run(atom_to_list(A)),
34 init:stop().
35
36 run(File) ->
37 case parse_file(File) of
38 {ok, L} ->
39 do_auto_exec(L, L, File);
40 {error, N} ->
41 io:format("~w errors in parse~n",[N]),
42 true
43 end.
44
45 %% by here all includes will have been removed
46 do_auto_exec([{autoexec,A}|T], L, File) ->
47 autoexec(A, L, File),
48 do_auto_exec(T, L, File);
49 do_auto_exec([{comment,_}|T], L, File) ->
50 do_auto_exec(T, L, File);
51 do_auto_exec(_, _, _) ->
52 true.
53
54 autoexec(Assoc, L, File) ->
55 case search(mod, Assoc) of
56 {ok, Mod} ->
57 %% io:format("Autoexec:~p~n",[Mod]),
58 Mod:exec(File, L);
59 error ->
60 io:format("*** error autoexec no module~n")
61 end.
62
63 fetch(Key, Assoc) ->
64 ok(search(Key, Assoc)).
65
66 ok({ok, X}) -> X.
67
68 search(Key, [{Key,Val}|_]) -> {ok, Val};
69 search(Key, [_|T]) -> search(Key, T);
70 search(_, []) -> error.
71
72 parse_file(File) ->
73 %% io:format("Parse file:~s~n",[File]),
74 {ok, B} = file:read_file(File),
75 parse_binary(B).
76
77 parse_binary(B) ->
78 case (catch parse_string(binary_to_list(B))) of
79 {'EXIT', Why} ->
80 {error, Why};
81 Ok ->
82 Ok
83 end.
84
85 parse_string(Str) ->
86 Chunks = break_into_chunks(Str, 1, []),
87 %% io:format("Chunks=~n~p~n", [Chunks]),
88 Parse = map(fun parse_chunk/1, Chunks),
89 %% io:format("Parse=~p~n",[Parse]),
90 Parse1 = do_includes(Parse),
91 Nerrors = length([error||error <- Parse1]),
92 case Nerrors of
93 0 ->
94 {ok, Parse1};
95 N ->
96 {error, N}
97 end.
98
99 do_includes([{include,X}|T]) ->
100 case search(data, X) of
101 {ok, {N, File}} ->
102 File1 = trim_filename(File),
103 io:format("Including file=~p~n",[File1]),
104 case parse_file(File1) of
105 {ok, Parse1} ->
106 Parse1 ++ do_includes(T);
107 {error, N} ->
108 [error|do_includes(T)]
109 end;
110 error ->
111 io:format("no file to include~n"),
112 [error|do_includes(T)]
113 end;
114 do_includes([H|T]) ->
115 [H|do_includes(T)];
116 do_includes([]) ->
117 [].
118
119 trim_filename([$\s|T]) -> trim_filename(T);
120 trim_filename(T) -> reverse(trim_junk(reverse(T))).
121
122 trim_junk([$\s|T]) -> trim_junk(T);
123 trim_junk([$\r|T]) -> trim_junk(T);
124 trim_junk([$\n|T]) -> trim_junk(T);
125 trim_junk(X) -> X.
126
127 parse_chunk({N, [$@|T]}) ->
128 case collect_header(T) of
129 {ok, Tag, T1} ->
130 %% io:format("Tag=~p T1=~p~n",[Tag,T1]),
131 {Toks, N1, T2} = tokenise(T1, N),
132 case Toks of
133 [] ->
134 {Tag, [{data,{N1,T2}}]};
135 _ ->
136 %% io:format("Toks=~p~n",[Toks]),
137 case elib1_ml9_parse_header:parse(Toks) of
138 {ok, Val} ->
139 {Tag, [{data,{N1,T2}}|Val]};
140 {error, {Ln,M,A}} ->
141 io:format("error in tag on line:~w ~s~n",
142 [Ln,M:format_error(A)]),
143 io:format("Toks=~p~n",[Toks]),
144 error
145 end
146 end;
147 error ->
148 error
149 end.
150
151 %% header follows @
152 %% and is an Atom
153
154 collect_header(Str) ->
155 case collect_atom(Str, []) of
156 {[], _} ->
157 error;
158 {Atom1, T} ->
159 {ok, Atom1, T}
160 end.
161
162 tokenise(T, N) ->
163 %% peep to see if we have a {
164 case skip_blanks(T) of
165 [${|_] ->
166 tokenise(T, 0, N, []);
167 _ ->
168 {[], N,T}
169 end.
170
171 tokenise([$}|T], 1, N, L) ->
172 %% top level } -- finishes the tokens
173 {reverse([{'}',N}|L]), N, T};
174 tokenise([$}|T], Level, N, L) ->
175 %% top level } -- finishes the tokens
176 tokenise1(T, Level-1,N, [{'}',N}|L]);
177 tokenise([${|T], Level, N, L) ->
178 %% top level } -- finishes the tokens
179 tokenise1(T, Level+1,N, [{'{',N}|L]);
180 tokenise([H|T], Level, N, L) when ?IN(H, $0, $9) ->
181 {Int, T1} = collect_integer(T, H - $0),
182 tokenise1(T1, Level, N, [{int,N,Int}|L]);
183 tokenise([$-,H|T], Level, N, L) when ?IN(H, $0, $9) ->
184 {Int, T1} = collect_integer(T, H - $0),
185 tokenise1(T1, Level, N, [{int,N,-Int}|L]);
186 tokenise([H|T], Level, N, L) when ?IN(H, $a, $z) ->
187 {Atom, T1} = collect_atom(T, [H]),
188 tokenise1(T1, Level, N, [{atom,N,Atom}|L]);
189 tokenise([$[|T], Level, N, L) ->
190 tokenise1(T, Level, N, [{'[',N}|L]);
191 tokenise([$]|T], Level, N, L) ->
192 tokenise1(T, Level, N, [{']',N}|L]);
193 tokenise([$=|T], Level, N, L) ->
194 tokenise1(T, Level, N, [{'=',N}|L]);
195 tokenise([$,|T], Level, N, L) ->
196 tokenise1(T, Level, N, [{',',N}|L]);
197 tokenise([$"|T], Level, N, L) ->
198 {Str,N1,T1} = collect_string(T, $", N, []),
199 tokenise1(T1,Level,N1,[{string,N,Str}|L]);
200 tokenise([$'|T], Level, N, L) ->
201 {Str,N1,T1} = collect_string(T, $', N, []),
202 tokenise1(T1, Level, N1,[{string,N,Str}|L]);
203 tokenise([$\t|T], Level, N, L) ->
204 tokenise1(T, Level, N, L);
205 tokenise([$\s|T], Level, N, L) ->
206 tokenise1(T, Level, N, L);
207 tokenise([$\r|T], Level, N, L) ->
208 tokenise1(T, Level, N, L);
209 tokenise([$\n|T], Level, N, L) ->
210 tokenise1(T, Level, N+1, L);
211 tokenise([$%|T], Level, N, L) ->
212 T1 = skip_to_eol(T),
213 tokenise1(T1, Level, N, L);
214 tokenise([H|_], _Level, N, _L) ->
215 io:format("unrecognised character \"~c\" in line ~w~n",
216 [H, N]),
217 throw(error).
218
219 tokenise1(T, Level, N, Toks) ->
220 %% io:format("---~ntokenise1: T=~p~nLevel=~p~nToks=~p~n",[T,Level,Toks]),
221 tokenise(T, Level, N, Toks).
222
223 skip_blanks([$\s|T]) -> skip_blanks(T);
224 skip_blanks([$\t|T]) -> skip_blanks(T);
225 skip_blanks(X) -> X.
226
227 skip_to_eol(X=[$\n|_]) -> X;
228 skip_to_eol([_|T]) -> skip_to_eol(T);
229 skip_to_eol(X) -> X.
230
231 collect_integer([H|T], N) when ?IN(H, $0, $9) ->
232 collect_integer(T, N*10+H-$0);
233 collect_integer(S, N) ->
234 {N, S}.
235
236 break_into_chunks([], _, L) ->
237 fix_first_chunk(reverse(L));
238 break_into_chunks(Str, N, L) ->
239 {Chunk, N1, Str1} = collect_chunk(Str, N, []),
240 break_into_chunks(Str1, N1, [{N, Chunk}|L]).
241
242 fix_first_chunk(X = [{_N,"@" ++ _}|_]) -> X;
243 fix_first_chunk([{N,_Str}|T]) -> [{N,"@comment "}|T];
244 fix_first_chunk(X) -> X.
245
246 collect_chunk("\r\n@" ++ T, N, L) -> {reverse(L), N+1, [$@|T]};
247 collect_chunk("\n@" ++ T, N, L) -> {reverse(L), N+1, [$@|T]};
248 collect_chunk([], N, L) -> {reverse(L), N, []};
249 collect_chunk([$\n|T], N, L) -> collect_chunk(T, N+1, [$\n|L]);
250 collect_chunk([H|T], N, L) -> collect_chunk(T, N, [H|L]).
251
252 collect_atom([H|T], L) when ?IN(H,$a,$z) -> collect_atom(T, [H|L]);
253 collect_atom([H|T], L) when ?IN(H,$A,$Z) -> collect_atom(T, [H|L]);
254 collect_atom([H|T], L) when ?IN(H,$0,$9) -> collect_atom(T, [H|L]);
255 collect_atom([$-|T], L) -> collect_atom(T, [$-|L]);
256 collect_atom([$_|T], L) -> collect_atom(T, [$_|L]);
257 collect_atom(T, L) -> {list_to_atom(reverse(L)), T}.
258
259 %% within a string we can quote only the stop character
260
261 collect_string([S|T], S, N, L) -> {reverse(L), N, T};
262 collect_string([$\\,S|T], S, N, L) -> collect_string(T, S, N, [S,$\\|L]);
263 collect_string([$\n|T], S, N, L) -> collect_string(T, S, N+1, [$\n|L]);
264 collect_string([H|T], S, N, L) -> collect_string(T, S, N, [H|L]);
265 collect_string([], S, N, L) ->
266 io:format("** Warning missing stop character (~c) at end of string in line:~w~n",
267 [S, N]),
268 {reverse(L), N, []}.
269
270 %% inline parser
271 parse_para(Str) ->
272 L = break_into_paras(Str),
273 map(fun(I) -> parse_para_content(I, []) end, L).
274
275
276 break_into_paras(S) ->
277 cleanup(break_into_paras0(S)).
278
279 cleanup([[]|T]) -> cleanup(T);
280 cleanup([H|T]) -> [trim(H)|cleanup(T)];
281 cleanup([]) -> [].
282
283 trim([$\n|T]) -> T;
284 trim(X) -> X.
285
286
287 break_into_paras0([]) ->
288 [];
289 break_into_paras0(Str) ->
290 {Para, Str1} = collect_para(Str, []),
291 [Para|break_into_paras0(Str1)].
292
293 collect_para([$\n|T], L) ->
294 case all_blank_before_next_nl(T) of
295 true ->
296 {reverse(L), T};
297 false ->
298 collect_para(T, [$\n|L])
299 end;
300 collect_para([H|T], L) ->
301 collect_para(T, [H|L]);
302 collect_para([], L) ->
303 {reverse(L), []}.
304
305 all_blank_before_next_nl([$\n|_]) -> true;
306 all_blank_before_next_nl([$\s|T]) -> all_blank_before_next_nl(T);
307 all_blank_before_next_nl([$\r|T]) -> all_blank_before_next_nl(T);
308 all_blank_before_next_nl([$\t|T]) -> all_blank_before_next_nl(T);
309 all_blank_before_next_nl([]) -> true;
310 all_blank_before_next_nl(_) -> false.
311
312 %% parse the paragraph contents
313 %% This just isolates the inline elements
314
315 parse_para_content("''''" ++ T, L) ->
316 {Str,T1} = collect_code(T, []),
317 parse_para_content(T1, [{code,Str}|L]);
318 parse_para_content("'''" ++ T, L) ->
319 {Str, T1} = collect_bold(T, []),
320 parse_para_content(T1, [{bold,Str}|L]);
321 parse_para_content("''" ++ T, L) ->
322 {Str, T1} = collect_italic(T, []),
323 parse_para_content(T1, [{italic,Str}|L]);
324 parse_para_content("~" ++ T, L) ->
325 {Word, T1} = collect_link(T, []),
326 parse_para_content(T1, [{link,Word}|L]);
327 parse_para_content([H|T], L) ->
328 {Str, T1} = collect_string(T, [H]),
329 parse_para_content(T1, [{str,remove_nl(Str)}|L]);
330 parse_para_content([], L) ->
331 reverse(L).
332
333 remove_nl([$\n|T]) -> T;
334 remove_nl(T) -> T.
335
336 collect_string(X=[H|T], L) ->
337 case start_of_inline(X) of
338 true ->
339 {reverse(L), X};
340 false ->
341 collect_string(T, [H|L])
342 end;
343 collect_string([], L) ->
344 {reverse(L), []}.
345
346
347 start_of_inline("~" ++ _) -> true;
348 start_of_inline("*" ++ _) -> true;
349 start_of_inline("''''" ++ _) -> true;
350 start_of_inline("'''" ++ _) -> true;
351 start_of_inline("''" ++ _) -> true;
352 start_of_inline(_) -> false.
353
354 collect_bold("'''" ++ T, L) -> {reverse(L), T};
355 collect_bold([H|T], L) -> collect_bold(T, [H|L]);
356 collect_bold([], L) -> {reverse(L), []}.
357
358 collect_code("''''" ++ T, L) -> {reverse(L), T};
359 collect_code([H|T], L) -> collect_code(T, [H|L]);
360 collect_code([], L) -> {reverse(L), []}.
361
362 collect_italic("''" ++ T, L) -> {reverse(L), T};
363 collect_italic([H|T], L) -> collect_italic(T, [H|L]);
364 collect_italic([], L) -> {reverse(L), []}.
365
366 collect_link([$\\,H|T], L) -> collect_link(T, [H|L]);
367 collect_link("~" ++ T, L) -> {reverse(L), T};
368 collect_link([H|T], L) -> collect_link(T, [H|L]);
369 collect_link([], L) -> {reverse(L), []}.
370
Something went wrong with that request. Please try again.