Permalink
Browse files

ANSI C parser + cpp for Erlang

  • Loading branch information...
1 parent b822a5d commit c7b41cd388824c6b3f08c6eef8e10221f91c3c2a tonyrog committed Aug 18, 2006
View
1 lib/bic/ebin/.cvsignore
@@ -0,0 +1 @@
+*
View
184 lib/bic/include/bic.hrl
@@ -0,0 +1,184 @@
+-record('ID',
+ {
+ line,
+ name
+ }).
+
+-record('TYPEID',
+ {
+ line,
+ name
+ }).
+
+-record('CONSTANT',
+ {
+ line,
+ base, %% 8,10,16 | char | float
+ value %% string rep of value
+ }).
+
+-record('UNARY',
+ {
+ line,
+ op,
+ arg
+ }).
+
+-record('EXPR',
+ {
+ line,
+ op,
+ arg1,
+ arg2
+ }).
+
+-record('CALL',
+ {
+ line,
+ func,
+ args
+ }).
+
+%% cond ? then : else FIXME? GNU: cond ? then
+-record('IFEXPR',
+ {
+ line,
+ test,
+ then,
+ else
+ }).
+
+-record('ASSIGN',
+ {
+ line,
+ op,
+ lhs,
+ rhs
+ }).
+
+%% Function declaration
+-record('FUNCTION',
+ {
+ line, %% line number
+ name,
+ storage, %% list of specifiers
+ type, %% return type
+ params, %% list of parameters [#decl]
+ body %% function body
+ }).
+
+%% variable & element declarations
+-record('DECL',
+ {
+ line, %% line number
+ name, %% optional identifier
+ type=[], %% type (specifier list)
+ value %% init value / bit filed size - optional
+ }).
+
+-record('STRUCT',
+ {
+ line,
+ name,
+ elems
+ }).
+
+-record('UNION',
+ {
+ line,
+ name,
+ elems
+ }).
+
+-record('ENUM',
+ {
+ line,
+ name, %% string() | undefined
+ elems %% [{id,value|undefined}]
+ }).
+
+
+
+-record('FOR',
+ {
+ line, %% line number
+ init,
+ test,
+ update,
+ body
+ }).
+
+-record('WHILE',
+ {
+ line, %% line number
+ test,
+ body
+ }).
+
+-record('DO',
+ {
+ line, %% line number
+ test,
+ body
+ }).
+
+-record('IF',
+ {
+ line,
+ test,
+ then,
+ else
+ }).
+
+-record('SWITCH',
+ {
+ line,
+ expr,
+ body
+ }).
+
+-record('CASE',
+ {
+ line,
+ expr,
+ code
+ }).
+
+-record('DEFAULT',
+ {
+ line,
+ code
+ }).
+
+-record('LABEL',
+ {
+ line,
+ name,
+ code
+ }).
+
+
+-record('GOTO',
+ {
+ line,
+ label
+ }).
+
+-record('CONTINUE',
+ {
+ line
+ }).
+
+-record('BREAK',
+ {
+ line
+ }).
+
+-record('RETURN',
+ {
+ line,
+ expr
+ }).
+
+
+
View
50 lib/bic/src/Makefile
@@ -0,0 +1,50 @@
+
+MODULES = \
+ bic_leex \
+ bic \
+ bic_cpp \
+ bic_scan \
+ bic_parse
+
+ERL_FLAGS = \
+ -I ../include -W0
+
+LEEX = -pa ../ebin
+
+override ERLC_FLAGS += $(ERL_FLAGS)
+
+debug: ERLC_FLAGS += +debug_info -Ddebug -W
+
+OBJS = $(MODULES:%=../ebin/%.beam)
+
+all: $(OBJS)
+
+debug: all
+
+depend:
+ edep -MM -o ../ebin $(ERLC_FLAGS) $(MODULES:%=%.erl) > depend.mk
+
+dialyze:
+ dialyzer --src -o dia.out $(ERLC_FLAGS) -c $(MODULES:%=%.erl)
+
+clean:
+ rm -f $(OBJS) bic_scan.erl bic_parse.erl
+
+
+-include depend.mk
+
+
+../ebin/bic_scan.beam: bic_scan.erl
+ erlc -o ../ebin $(LEEX) $(ERLC_FLAGS) $<
+
+bic_scan.erl: bic_scan.xrl
+ -erl -noshell $(LEEX) $(ERL_FLAGS) -run bic_leex gen $< $@ -run init stop
+
+../ebin/bic_parse.beam: bic_parse.erl
+
+bic_parse.erl: bic_parse.yrl
+ erlc $<
+
+../ebin/%.beam: %.erl
+ erlc -o ../ebin $(ERLC_FLAGS) $<
+
View
54 lib/bic/src/bic.erl
@@ -0,0 +1,54 @@
+%%% File : bic.erl
+%%% Author : Tony Rogvall <tony@iMac.local>
+%%% Description : BEAM interpreted C (ode)
+%%% Created : 27 Dec 2005 by Tony Rogvall <tony@iMac.local>
+
+-module(bic).
+
+-compile(export_all).
+
+-import(lists, [map/2]).
+
+-include("../include/bic.hrl").
+
+cpp(File) ->
+ bic_cpp:file(File).
+
+file(File) ->
+ file(File,[]).
+
+file(File,Env) ->
+ case bic_cpp:open(File) of
+ {ok,Fd} ->
+ bic_scan:init(), %% setup some dictionay stuff
+ bic_parse:init(), %% setup some dictionay stuff
+ Res = (catch bic_parse:parse_and_scan({bic_scan, scan, [Fd]})),
+ bic_cpp:close(Fd),
+ case Res of
+ {error,{{Fn,Ln},Mod,Message}} when integer(Ln) ->
+ io:format("~s:~w: ~s\n",
+ [Fn,Ln,Mod:format_error(Message)]),
+ {error,parse_error};
+ {error,{Ln,Mod,Message}} when integer(Ln) ->
+ io:format("~s:~w: ~s\n",
+ [File,Ln,Mod:format_error(Message)]),
+ {error,parse_error};
+ {ok,List} when list(List) ->
+ map(fun(D) ->
+ io:format("~p\n", [D])
+ end, List),
+ List;
+ Other ->
+ Other
+ end;
+ Error ->
+ Error
+ end.
+
+
+
+
+
+
+
+
View
1,109 lib/bic/src/bic_cpp.erl
@@ -0,0 +1,1109 @@
+%%% File : bic_cpp.erl
+%%% Author : Tony Rogvall <tony@iMac.local>
+%%% Description : CPP
+%%% Created : 3 Jan 2006 by Tony Rogvall <tony@iMac.local>
+
+-module(bic_cpp).
+
+-compile(export_all).
+
+-export([init/3,open/1,close/1,read/1]).
+-export([file/1]).
+-export([cline/1, cfile/1, cvalue/2]).
+
+-import(lists, [foreach/2, reverse/1, map/2, foldl/3]).
+
+-include("bic.hrl").
+
+-define(debug, true).
+
+-ifdef(debug).
+-define(dbg(F,A), io:format((F),(A))).
+-else.
+-define(dbg(F,A), ok).
+-endif.
+
+-record(save,
+ {
+ fd, %% current fd
+ file, %% real file name
+ line, %% real line number
+ if_stack, %% current if stack
+ pwd, %% current working directory
+ buf, %% stored data from file
+ defs %% "some" saved defs
+ }).
+
+-record(s,
+ {
+ fd, %% current fd
+ file, %% real file name
+ line, %% real line number
+ if_stack=[], %% {Tag,0|1|2,Ln}
+ inc_stack=[], %% #save {}
+ pwd, %% current working directory
+ include=[], %% search path for #include <foo.h>
+ qinclude=[], %% search path for #include "foo.h"
+ preds, %% dict of assertions {Pred,Answer}
+ defs %% dict of defines {Name,Value}
+ }).
+
+-ifdef(debug).
+-define(CHUNK_SIZE, 1).
+-else.
+-define(CHUNK_SIZE, 1024).
+-endif.
+
+%% test to cpp a file and print it
+file(File) ->
+ case open(File) of
+ {ok,PFd} ->
+ fd(PFd),
+ close(PFd),
+ ok;
+ {error,Err} ->
+ {error,Err}
+ end.
+
+fd(PFd) ->
+ case read(PFd) of
+ eof ->
+ ok;
+ Line ->
+ io:format("~s", [Line]),
+ fd(PFd)
+ end.
+
+open(File) ->
+ open(File, []).
+
+open(File, Opts) ->
+ Pid = spawn_link(?MODULE, init, [self(),File,Opts]),
+ Mon = mon(Pid),
+ receive
+ {'DOWN',Mon,_,_,Reason} ->
+ {error, Reason};
+ {Pid,Result} ->
+ demon(Mon),
+ Result
+ end.
+
+close(PFd) when is_pid(PFd) ->
+ call(PFd, close).
+
+%% return Line of characters | eof
+read(PFd) when is_pid(PFd) ->
+ call(PFd, read).
+
+cline(PFd) when is_pid(PFd) ->
+ cvalue(PFd, "__LINE__").
+
+cfile(PFd) when is_pid(PFd) ->
+ cvalue(PFd,"__FILE__").
+
+cvalue(PFd, Var) ->
+ call(PFd, {value, Var}).
+
+call(PFd, Request) ->
+ Ref = mon(PFd),
+ PFd ! {call,self(),Ref,Request},
+ receive
+ {'DOWN',Ref,_,_,Reason} ->
+ {error, Reason};
+ {Ref, Reply} ->
+ demon(Ref),
+ Reply
+ end.
+
+mon(Pid) ->
+ erlang:monitor(process, Pid).
+
+demon(Ref) ->
+ Res = erlang:demonitor(Ref),
+ receive
+ {'DOWN',Ref,_,_,_} ->
+ Res
+ after 0 ->
+ Res
+ end.
+%%
+%% Initial processing
+%%
+init(Pid,File,Opts) ->
+ ?dbg("OPEN: ~s\n", [File]),
+ case file:open(File, [read]) of
+ {ok,Fd} ->
+ Pid ! {self(),{ok,self()}},
+ init_loop(Pid, Fd, File, Opts);
+ Error ->
+ Pid ! {self(), Error}
+ end.
+
+init_loop(Pid, Fd, File, UserOpts) ->
+ Defs = init_defs(default_opts(File)++UserOpts),
+ Preds = init_preds(UserOpts),
+ Include = init_include(UserOpts,["/usr/local/include", "/usr/include"]),
+ QInclude = init_qinclude(UserOpts,[]),
+ S0 = #s { file = File,
+ fd = Fd,
+ line = 1,
+ pwd = ".",
+ include = Include,
+ qinclude = QInclude,
+ defs = Defs,
+ preds = Preds
+ },
+ loop(Pid, S0, [], [auto_line(1,File)]).
+
+
+init_include([{include,Path} | Opts], Paths) when list(Path) ->
+ init_include(Opts, Paths ++ [Path]);
+init_include([{include,Path} | Opts], Paths) when atom(Path) ->
+ init_include(Opts, Paths ++ [atom_to_list(Path)]);
+init_include([_|Opts], Paths) ->
+ init_include(Opts, Paths);
+init_include([], Paths) ->
+ Paths.
+
+init_qinclude([{qinclude,Path} | Opts], Paths) when list(Path) ->
+ init_qinclude(Opts, Paths ++ [Path]);
+init_qinclude([{qinclude,Path} | Opts], Paths) when atom(Path) ->
+ init_qinclude(Opts, Paths ++ [atom_to_list(Path)]);
+init_qinclude([_|Opts], Paths) ->
+ init_qinclude(Opts, Paths);
+init_qinclude([], Paths) ->
+ Paths.
+
+init_defs(Opts) ->
+ foldl(fun({define,N,V},D) ->
+ dict:store(N, V, D);
+ (_, D) ->
+ D
+ end, dict:new(), Opts).
+
+init_preds(Opts) ->
+ foldl(fun({assert,P,V},D) ->
+ dict:store({P,V},true,D);
+ (_,D) ->
+ D
+ end, dict:new(), Opts).
+
+
+loop(Pid, S, Cs, LBuf) ->
+ receive
+ {call,From,Ref,read} ->
+ case LBuf of
+ [] ->
+ case iread(S,Cs) of
+ {S1,{[],0,eof}} ->
+ From ! {Ref, eof},
+ loop(Pid,S1,[],LBuf);
+ {S1,{[],0,Err={error,_}}} ->
+ From ! {Ref, Err},
+ loop(Pid,S1,[],LBuf);
+ {S1,{Line,NL,Cs1}} ->
+ From ! {Ref, Line},
+ loop(Pid,S1,Cs1,LBuf)
+ end;
+ [Line|LBuf1] ->
+ From ! {Ref, Line},
+ loop(Pid, S, Cs, LBuf1)
+ end;
+ {call,From,Ref,close} ->
+ %% close all files in save state and the main one
+ foreach(fun(Save) ->
+ ?dbg("CLOSE: ~s\n", [Save#save.file]),
+ file:close(Save#save.fd)
+ end, S#s.inc_stack),
+ ?dbg("CLOSE: ~s\n", [S#s.file]),
+ file:close(S#s.fd),
+ From ! {Ref, ok},
+ ok;
+ {call,From,Ref,{value,Var}} ->
+ From ! {Ref, value(S,Var)},
+ loop(Pid,S,Cs,LBuf)
+ end.
+
+default_opts(File) ->
+ {Dy,Dm,Dd} = date(),
+ {Th,Tm,Ts} = time(),
+ Mon = case Dm of
+ 1->"Jan"; 2->"Feb"; 3->"Mar"; 4->"Apr"; 5->"May"; 6->"Jun";
+ 7->"Jul"; 8->"Aug"; 9->"Sep"; 10->"Oct"; 11->"Nov"; 12->"Dec"
+ end,
+ Time=lists:flatten(io_lib:format("~2..0w:~2..0w:~2..0w",[Th,Tm,Ts])),
+ Date=lists:flatten(io_lib:format("~s ~2w ~4w",[Mon,Dd,Dy])),
+ [{define,"__FILE__", File}, %% may be #undef
+ {define,"__LINE__", 1}, %% may be #undef
+ {define,"__INCLUDE_LEVEL__",0}, %% read only can not be #undef
+ {define,"__DATE__", Date},
+ {define,"__TIME__",Time}
+ ].
+
+%%
+%% Read a continued line
+%% Take care of \ and /*comments */ and // comments
+%%
+
+-define(FALSE, 0).
+-define(TRUE, 1).
+-define(SKIP, 2).
+
+iread(S,Cs) ->
+ iread(S,0,Cs).
+
+iread(S0,NL,Cs) ->
+ {S,Read} = iread(S0,Cs,false,NL,[]),
+%% ?dbg("Read: ~p\n", [Read]),
+ case Read of
+ {{D,Bool},NL1,Cs1} when D=='#if'; D=='#ifdef'; D=='#ifndef'->
+ case peek_if(S) of
+ {Tag,V,Ln} ->
+ if V == ?FALSE ->
+ S1 = push_if(S, D,?SKIP,S#s.line),
+ iread(S1,NL1,Cs1);
+ V == ?TRUE ->
+ S1 = push_if(S, D,Bool,S#s.line),
+ iread(S1,NL1,Cs1);
+ V == ?SKIP ->
+ S1 = push_if(S,D,?SKIP,S#s.line),
+ iread(S1,NL1,Cs1)
+ end;
+ empty ->
+ S1 = push_if(S,D,Bool,S#s.line),
+ iread(S1,NL1,Cs1)
+ end;
+
+ {{'#elif',?TRUE},NL1,Cs1} ->
+ case peek_if(S) of
+ {Tag,V,Ln} ->
+ if Tag == '#ifdef'; Tag == '#ifndef'; Tag=='#else' ->
+ error(S, "#elif missing #if", []),
+ iread(S,NL1,Cs);
+ V == ?SKIP ->
+ iread(S,NL1,Cs1);
+ V == ?TRUE -> %% skip,alread processed
+ S1 = mod_if(S,Tag,?SKIP,Ln),
+ iread(S1,NL1,Cs1);
+ V == ?FALSE -> %% take this clause
+ S1 = mod_if(S, '#elif',?TRUE,S#s.line),
+ iread(S1,NL1,Cs1)
+ end;
+ empty ->
+ error(S, "#elif missing #if", []),
+ iread(S,NL1,Cs1)
+ end;
+
+ {{'#elif',?FALSE},NL1,Cs1} ->
+ case peek_if(S) of
+ {Tag,V,Ln} ->
+ if Tag == '#ifdef'; Tag == '#ifndef'; Tag=='#else' ->
+ error(S, "#elif missing #if", []),
+ iread(S,NL1,Cs);
+ V == ?SKIP ->
+ iread(S,NL1,Cs1);
+ V == ?TRUE -> %% skip,alread processed
+ S1 = mod_if(S,Tag,?SKIP,Ln),
+ iread(S1,NL1,Cs1);
+ V == ?FALSE -> %% ignore this clause
+ S1 = mod_if(S,'#elif',?FALSE,S#s.line),
+ iread(S1,NL1,Cs1)
+ end;
+ empty ->
+ error(S, "#elif missing #if", []),
+ iread(S,NL1,Cs1)
+ end;
+
+ {'#else',NL1,Cs1} ->
+ case peek_if(S) of
+ {'#else',V,Ln} ->
+ error(S, "#else missing #if/#ifdef", []),
+ iread(S,NL1,Cs1);
+
+ {Tag,V,Ln} ->
+ if V == ?SKIP ->
+ iread(S,NL1,Cs1);
+ V == ?TRUE ->
+ S1 = mod_if(S,Tag,?SKIP,Ln),
+ iread(S1,NL1,Cs1);
+ V == ?FALSE ->
+ S1 = mod_if(S,'#else',?TRUE,S#s.line),
+ iread(S1,NL1,Cs1)
+ end;
+ [] ->
+ error(S, "#else missing #if", []),
+ iread(S,NL1,Cs1)
+ end;
+
+ {'#endif',NL1,Cs1} ->
+ case pop_if(S) of
+ {S1,true} ->
+ iread(S1,NL1,Cs1);
+ {S1,false} ->
+ error(S, "#endif missing #if/#ifdef", []),
+ iread(S1,NL1,Cs1)
+ end;
+
+ {[],_NL,eof} ->
+ case restore(S) of
+ {S1,empty} ->
+ {S1,{[],0,eof}};
+ {S1,Cs1} ->
+ ?dbg("CLOSE: ~s\n", [S#s.file]),
+ file:close(S#s.fd),
+ {S1,{auto_line(S1#s.line,S1#s.file),0,Cs1}}
+ end;
+
+ {Data,NL1,Cs1} when list(Data) ->
+ case peek_if(S) of
+ {_,?FALSE,_} ->
+ iread(S,NL1,Cs1);
+ {_,?SKIP,_} ->
+ iread(S,NL1,Cs1);
+ {_,?TRUE,_} ->
+ {S,{Data,NL1,Cs1}};
+ empty ->
+ {S,{Data,NL1,Cs1}}
+ end
+ end.
+
+%%
+%% Scan a line and remove comments
+%%
+iread(S,[$/,$\\|Cs],Skip,NL,Buf) ->
+ iread_bsl(S,Cs,[$/],[],Skip,NL,Buf);
+iread(S,[$\\|Cs],Skip,NL,Buf) ->
+ iread_bsl(S,Cs,[],[],Skip,NL,Buf);
+iread(S,[$/,$/|Cs],Skip,NL,Buf) ->
+ {S1,{Ln,NL1,Cs1}} = iread(S,Cs,true,NL,[]),
+ process(S1,Skip,reverse([$\n,$\s|Buf]),NL1-1,Cs1);
+iread(S,[$/,$*|Cs],Skip,NL,Buf) ->
+ Cs1 = comment(S,NL,Cs),
+ iread(S,Cs1,Skip,[],Buf);
+iread(S,Cs=[$/],Skip,NL,Buf) ->
+ iread_more(S,Cs,Skip,NL,Buf);
+iread(S,[$\n|Cs],Skip,NL,Buf) ->
+ S1 = update_line(S,1),
+ process(S1,Skip,reverse([$\n|Buf]),NL,Cs);
+iread(S,[C|Cs],Skip,NL,Buf) ->
+ iread(S,Cs,Skip,NL,[C|Buf]);
+iread(S,[],Skip,NL,Buf) ->
+ iread_more(S,[],Skip,NL,Buf).
+
+%% scan after \ to look for WS*\n
+iread_bsl(S,[C|Cs],Hs,Acc,Skip,NL,Buf) ->
+ if C == $\s; C == $\t; C == $\r ->
+ iread_bsl(S,Cs,Hs,[$\s|Acc],Skip,NL,Buf);
+ C == $\n ->
+ iread(S,Hs++Cs,Skip,NL+1,Buf);
+ true ->
+ iread(S,[C|Cs],Skip,NL,Acc++Hs++[$\\|Buf])
+ end;
+iread_bsl(S,[],Hs,Acc,Skip,NL,Buf) ->
+ case file:read(S#s.fd, ?CHUNK_SIZE) of
+ eof -> process(S,Skip,reverse(Buf),NL,eof);
+ Err={error,_} -> process(S,Skip,reverse(Buf),NL,[]);
+ {ok,Chars} -> iread_bsl(S,Chars,Hs,Acc,Skip,NL,Buf)
+ end.
+
+iread_more(S,[],_,NL,eof) ->
+ {S,{[],0,eof}};
+iread_more(S,Cs,Skip,NL,Buf) ->
+ case file:read(S#s.fd, ?CHUNK_SIZE) of
+ eof ->
+ process(S,Skip,reverse(Buf),NL,eof);
+ Err={error,_} ->
+ process(S,Skip,reverse(Buf),NL,Err);
+ {ok,Chars} ->
+ iread(S,Cs++Chars,Skip,NL,Buf)
+ end.
+
+
+%% Skip a block comment ( after reading /* )
+comment(S, Cs) ->
+ comment(S, [], Cs).
+
+comment(_S,NL, [$*,$/ | Cs]) -> [$\s|nl(NL)]++Cs;
+comment(S,NL,[$*,$\\|Cs]) -> comment_bsl(S,NL,Cs);
+comment(S,NL,Cs=[$*]) -> comment_more(S,NL,Cs);
+comment(S,NL,[$\n|Cs]) -> comment(S,NL+1, Cs);
+comment(S,NL,[_C|Cs]) -> comment(S, NL, Cs);
+comment(S,NL,[]) -> comment_more(S,NL,[]).
+
+comment_more(S,NL,Cs) ->
+ case file:read(S#s.fd, ?CHUNK_SIZE) of
+ eof -> [$\s|nl(NL)]++Cs;
+ {error,_} -> [$\s|nl(NL)]++Cs;
+ {ok,Chars} -> comment(S,NL,Cs++Chars)
+ end.
+
+nl(NL) ->
+ lists:duplicate(NL, $\n).
+
+%% handle the case:
+%% *\
+%% /
+comment_bsl(S,NL,[$\s|Cs]) -> comment_bsl(S,NL,Cs);
+comment_bsl(S,NL,[$\t|Cs]) -> comment_bsl(S,NL,Cs);
+comment_bsl(S,NL,[$\r|Cs]) -> comment_bsl(S,NL,Cs);
+comment_bsl(S,NL,[$\n|Cs]) -> comment(S,NL+1,[$*|Cs]);
+comment_bsl(S,NL,[C|Cs]) -> comment(S,NL,[C|Cs]);
+comment_bsl(S,NL,[]) ->
+ case file:read(S#s.fd, ?CHUNK_SIZE) of
+ eof -> [$\s|nl(NL)];
+ {error,_} -> [$\s|nl(NL)];
+ {ok,Chars} -> comment_bsl(S,NL,Chars)
+ end.
+
+process(S,false,Line,NL,Cs) ->
+ %% io:format("process: line=~p, cs=~p\n", [Line,Cs]),
+ case scan_one(Line,true,true) of
+ {[{char,"#"}], LCs} ->
+ process_cpp(S,LCs,NL,Cs);
+ {_, _} ->
+ ELine = expand(S, Line),
+ %% io:format("expand: line=~p\n", [ELine]),
+ {S,{ELine,NL,Cs}}
+ end;
+process(S,true,Line,NL,Cs) ->
+ %% io:format("process:skip: line=~p, cs=~p\n", [Line,Cs]),
+ {S,{[],NL+1,Cs}}.
+
+
+%% process characters after seen #
+process_cpp(S,LCs, NL, Cs) ->
+ case scan_one(LCs,true,true) of
+ {[],[]} -> %% null directive produce a signle line with #
+ {S,{"#\n",NL,Cs}};
+ {[{id,"ident"}],_LCs1} -> %% ignored
+ {S,{[],NL+1,Cs}};
+ {[{id,"sccs"}],_LCs1} -> %% ignored
+ {S,{[],NL+1,Cs}};
+ {[{id,"pragma"}],LCs1} -> %% ignored for now
+ {S,{[],NL+1,Cs}};
+ {[{id,"else"}],_} -> %% ignore trailing data?
+ {S,{'#else',NL+1,Cs}};
+ {[{id,"endif"}],_} -> %% ignore trailing data?
+ {S,{'#endif',NL+1,Cs}};
+ {[{id,"define"}],LCs1} ->
+ %% #define Name Value
+ %% | #define Name(A1,..An) Value
+ case scan_one(LCs1,true,true) of
+ {[{id,Var}],LCs2="("++_} ->
+ S1 = define_macro(S,Var,LCs2),
+ {S1,{[],NL+1,Cs}};
+ {[{id,Var}], LCs2} ->
+ S1 = define(S,Var,LCs2),
+ {S1,{[],NL+1,Cs}};
+ {[],_} ->
+ error(S, "no macro name given in #define directive", []),
+ {S,{[],NL+1,Cs}};
+ {_, _} ->
+ error(S, "bad macro name given in #define directive", []),
+ {S,{[],NL+1,Cs}}
+ end;
+ {[{id,"undef"}],LCs1} ->
+ %% #undef <id>
+ case scan_all(LCs1,true,true) of
+ {[{id,Var}|_],_} ->
+ S1 = undef(S,Var),
+ {S1,{[],NL+1,Cs}};
+ {_, _} ->
+ error(S, "no macro name given in #undef directive", []),
+ {S,{[],NL+1,Cs}}
+ end;
+ {[{id,"assert"}],LCs1} ->
+ %% #assert predicate value
+ case scan_all(LCs1,true,true) of
+ {[{id,Pred},{id,Answer}],_} ->
+ S1 = assert(S, Pred, Answer),
+ {S1,{[],NL+1,Cs}};
+ {_, _} ->
+ error(S, "bad arguments to #assert directive", []),
+ {S,{[],NL+1,Cs}}
+ end;
+ {[{id,"unassert"}],LCs1} ->
+ %% #assert predicate value
+ case scan_all(LCs1,true,true) of
+ {[{id,Pred},{id,Answer}],_} ->
+ S1 = unassert(S, Pred, Answer),
+ {S1,{[],NL+1,Cs}};
+ {[{id,Pred}],_} ->
+ S1 = unassert(S,Pred),
+ {S1,{[],NL+1,Cs}};
+ {_, _} ->
+ error(S, "bad arguments to #unassert directive", []),
+ {S,{[],NL+1,Cs}}
+ end;
+ {[{id,"if"}],LCs1} ->
+ cpp_if(S,LCs1,'#if',NL,Cs);
+ {[{id,"elif"}],LCs1} ->
+ cpp_if(S,LCs1,'#elif',NL,Cs);
+ {[{id,"ifdef"}],LCs1} ->
+ %% #ifndef <id>
+ %% [text]
+ %% [#else
+ %% text]
+ %% #endif
+ %%
+ case scan_all(LCs1,true,true) of
+ {[{id,Var} | _],_} ->
+ {S,{{'#ifdef', defined(S,Var)},NL+1,Cs}};
+ {_,_} ->
+ error(S,"no macro name given in #ifdef directive",[]),
+ {S,{{'#ifdef', 0},NL+1,Cs}}
+ end;
+ {[{id,"ifndef"}],LCs1} ->
+ %% #ifndef <id>
+ %% text
+ %% [#else
+ %% text]
+ %% #endif
+ case scan_all(LCs1,true,true) of
+ {[{id,Var} | _],_} ->
+ {S,{{'#ifndef', 1-defined(S,Var)},NL+1,Cs}};
+ {_,_} ->
+ error(S, "no macro name given in #ifdef directive",[]),
+ {S,{{'#ifndef', 0},NL+1,Cs}}
+ end;
+ {[{id,"include"}],LCs1} ->
+ case scan_all(LCs1,true,false) of
+ {[{string,File}],_} ->
+ cpp_include(S,File,quoted,NL,Cs);
+ {[{search,File}],_} ->
+ cpp_include(S,File,search,NL,Cs);
+ {_,_} ->
+ error(S, "bad arguments given in #include directive",[]),
+ {S,{[],NL+1,Cs}}
+ end;
+
+ {[{id,"include_next"}],LCs1} ->
+ error(S, "include_next NOT implemented", []),
+ {S,{[],NL+1,Cs}};
+
+ {[{id,"line"}],LCs1} ->
+ case scan_all(LCs1,true,true) of
+ %% #line <num>
+ %% | #line <num> <string>
+ {[{num,Ln}],_} ->
+ case catch list_to_integer(Ln) of
+ {'EXIT',_} ->
+ error(S, "bad linenumber ~p in #line directive",
+ [Ln]),
+ {S,{[],NL+1,Cs}};
+ N ->
+ S1 = set_line(S, N),
+ {S1,{[],NL+1,Cs}}
+ end,
+ {[],NL+1,Cs};
+ {[{num,Ln},{string,File}],_} ->
+ case catch list_to_integer(Ln) of
+ {'EXIT',_} ->
+ error(S, "bad linenumber ~p in #line directive",
+ [Ln]),
+ {S,{[],NL+1,Cs}};
+ N ->
+ S1 = set_line(S, N, File),
+ {S1,{[],NL+1,Cs}}
+ end;
+ {_, _} ->
+ error(S, "bad #line directive", []),
+ {S,{[],NL+1,Cs}}
+ end;
+ {[{id,"error"}],LCs1} ->
+ error_ts(S, scan_all(LCs1,true,true)),
+ {S,{[],NL+1,Cs}};
+
+ {[{id,"warning"}],LCs1} ->
+ warning_ts(S, scan_all(LCs1,true,true)),
+ {S,{[],NL+1,Cs}};
+
+ {_,_} ->
+ error(S, "preprocessor directive not understood",[]),
+ {S,{[],NL+1,Cs}}
+ end.
+
+%%
+%% #if <expr>
+%% [text]
+%% [#elif <expr>
+%% text]
+%% [#elif <expr>
+%% text]
+%% [#else
+%% text]
+%% #endif
+%%
+
+cpp_if(S,LCs,Tag, NL, Cs) ->
+ case bic_scan:string(LCs) of
+ {ok,Ts,_} ->
+ %% Put scanned tokens in a parsable for:
+ %% int _ = Ts;
+ Ln = S#s.line,
+ case bic_parse:parse([{int,Ln},
+ {identifier,Ln,"_"},
+ {'=',Ln}|Ts]++
+ [{';',Ln}]) of
+ {ok,[#'DECL' { value=Expr }]} ->
+ {S,{{Tag,eval(S,Expr)},NL+1,Cs}};
+ {error,{_Ln,Mod,Message}} ->
+ Err = Mod:format_error(Message),
+ error(S, "~s", [Err]),
+ {S,{{Tag, 0},NL+1,Cs}}
+ end;
+ {error,{_Ln,Mod,Message}} ->
+ Err = Mod:format_error(Message),
+ error(S, "~s", [Err]),
+ {S,{{Tag, 0},NL+1,Cs}}
+ end.
+
+%% #include "file"
+%% | #include <file>
+%%
+cpp_include(S,File,quoted,NL,Cs) ->
+ Path = [S#s.pwd]++S#s.qinclude ++ S#s.include,
+ cpp_include_path(S, File, Path, NL, Cs);
+cpp_include(S,File,search,NL,Cs) ->
+ Path = S#s.include,
+ cpp_include_path(S, File, Path, NL, Cs).
+
+cpp_include_path(S, Name, [Path | Ps], NL, Cs) ->
+ File = filename:join(Path, Name),
+ ?dbg("OPEN: ~s\n", [File]),
+ case file:open(File,[read]) of
+ {ok, Fd} ->
+ ?dbg("Open file: ~s\n", [File]),
+ S1 = save(S, Cs),
+ S2 = S1#s { fd = Fd,
+ file = File,
+ line = 1,
+ if_stack = [],
+ pwd = filename:dirname(File)
+ },
+ {S2,{auto_line(1,File),0,[]}};
+ {error,_} ->
+ cpp_include_path(S,File,Ps,NL,Cs)
+ end;
+cpp_include_path(S,File,[],NL,Cs) ->
+ error(S, "file ~s not found", [File]),
+ {S, {[],NL+1, Cs}}.
+
+
+%%
+%% FIXME: store buffers positions and reopend to save
+%% open file descriptiors (when regular files!)
+%%
+save(S, Buf) ->
+ Elem =
+ #save { fd = S#s.fd, file = S#s.file, line = S#s.line,
+ if_stack = S#s.if_stack,
+ pwd = S#s.pwd, buf = Buf,
+ defs = [{"__FILE__", value(S, "__FILE__")},
+ {"__LINE__", value(S, "__LINE__")},
+ {"__INCLUDE_LEVEL__", value(S,"__INCLUDE_LEVEL__")}
+ ]
+ },
+ ?dbg("Save file ~s:~w\n", [S#s.file,S#s.line]),
+ Defs = dict_set(S#s.defs,"__INCLUDE_LEVEL__",
+ value(S,"__INCLUDE_LEVEL__")+1),
+ S#s { defs = Defs, inc_stack = [Elem | S#s.inc_stack]}.
+
+restore(S) ->
+ case S#s.inc_stack of
+ [] ->
+ {S,empty};
+ [#save { fd=Fd, file=File, line=Line,
+ if_stack=Stack, pwd=Pwd,
+ buf=Buf, defs=DefsList }|Is] ->
+ ?dbg("Restore file ~s:~w\n", [File,Line]),
+ Defs = foldl(fun({Name,Value},D) ->
+ dict_set(D, Name, Value)
+ end, S#s.defs, DefsList),
+ S1 = S#s { file=File,
+ fd = Fd,
+ line = Line,
+ if_stack = Stack,
+ inc_stack = Is,
+ pwd = Pwd,
+ defs = Defs
+ },
+ {S1, Buf}
+ end.
+
+auto_line(N, File) ->
+ lists:flatten(io_lib:format("# ~w \"~s\"\n",[N, File])).
+
+%% push directive status line on IF-STACK
+push_if(S,D,V,Ln) ->
+ Stack = S#s.if_stack,
+ S#s { if_stack = [{D,V,Ln} | Stack]}.
+
+%% pop IF-STACK
+pop_if(S) ->
+ case S#s.if_stack of
+ [] ->
+ {S,false};
+ [_|Stack] ->
+ {S#s { if_stack = Stack }, true}
+ end.
+
+%% peek top element on IF-STACK
+peek_if(S) ->
+ case S#s.if_stack of
+ [] ->
+ empty;
+ [E|_] ->
+ E
+ end.
+
+%% modifiy top element
+mod_if(S,D,V,Ln) ->
+ [_ | Stack] = S#s.if_stack,
+ S#s { if_stack = [{D,V,Ln} | Stack]}.
+
+assert(S,Pred,Answer) ->
+ P = dict:store({Pred,Answer},true,S#s.preds),
+ S#s { preds = P }.
+
+unassert(S,Pred,Answer) ->
+ P = dict:erase({Pred,Answer},S#s.preds),
+ S#s { preds = P }.
+
+unassert(S,Pred) ->
+ P =
+ foldl(
+ fun({P,A},P0) when P == Pred ->
+ dict:erase({P,A},P0);
+ (_, P0) ->
+ P0
+ end,S#s.preds, dict:fetch_keys(S#s.preds)),
+ S#s { preds = P }.
+
+
+define_macro(S, Name, Cs) ->
+ {Ts, Def} = scan_until_char(Cs,false,true,")"),
+ case parse_macro_args(Ts) of
+ {ok,Ps} ->
+ define(S,Name,{Ps,Cs});
+ {error,Msg} ->
+ error(S, "~s in macro argument list", [Msg]),
+ S
+ end.
+
+parse_macro_args([{char,"("} | Ts]) ->
+ parse_macro_args(Ts,[]);
+parse_macro_args(_) ->
+ {error, "missing ("}.
+
+parse_macro_args([{id,Name},{char,","}|Ts], Ps) ->
+ parse_macro_args(Ts, [Name | Ps]);
+parse_macro_args([{id,Name},{char,")"}], Ps) ->
+ check_macro_args(reverse([Name|Ps]));
+parse_macro_args([{char,"."},{char,"."},{char,"."},{char,")"}],Ps) ->
+ check_macro_args(reverse([".__VA_ARGS__"|Ps]));
+parse_macro_args([{id,Name},{char,"."},{char,"."},{char,"."},{char,")"}],Ps) ->
+ check_macro_args(reverse(["."++Name|Ps]));
+parse_macro_args([{char,")"}], Ps) ->
+ check_macro_args(reverse(Ps));
+parse_macro_args(_, _Ps) ->
+ {error, "syntax error"}.
+
+check_macro_args(Ns) ->
+ N1 = length(Ns),
+ N2 = length(lists:usort(Ns)),
+ if N1 == N2 ->
+ {ok, Ns};
+ true ->
+ {error, "argument multiply defined"}
+ end.
+
+
+%% expand a line
+%% x = 1
+%% xx = 2
+%%
+%% x/* */x => 1 1
+%%
+expand(S,[]) ->
+ [];
+expand(S,Cs) ->
+ case scan_one(Cs,false,true) of
+ {[{id,Name}], Cs1} ->
+ case dict:find(Name,S#s.defs) of
+ error ->
+ Name ++ expand(S,Cs1);
+ {ok,{Ps,Cs2}} ->
+ io:format("FIXME macro expand\n"),
+ Cs3 = Cs2, %% parse arguments
+ expand(S,Cs3++Cs1);
+ {ok,N} when integer(N) -> %% for internals like __LINE__
+ integer_to_list(N) ++ expand(S,Cs1);
+ {ok,Cs2} ->
+ Cs2++expand(S,Cs1)
+ end;
+ {[{char,[C]}],Cs1} ->
+ [C | expand(S,Cs1)];
+ {[{num,Num}],Cs1} ->
+ Num ++ expand(S,Cs1);
+ {[{string,Str}],Cs1} ->
+ [$\"|Str] ++ [$\" | expand(S,Cs1)];
+ {[{search,Str}],Cs1} ->
+ [$\<|Str] ++ [$\> | expand(S,Cs1)]
+ end.
+
+%% Evaluate a #if expression
+%% return 0 or 1
+eval(S,#'EXPR' { op=Op, arg1=Arg1, arg2=Arg2}) ->
+ case Op of
+ '+' -> eval(S,Arg1)+eval(S,Arg2);
+ '-' -> eval(S,Arg1)-eval(S,Arg2);
+ '*' -> eval(S,Arg1)*eval(S,Arg2);
+ '/' ->
+ A1=eval(S,Arg1),A2=eval(S,Arg2),
+ if is_integer(A1), is_integer(A2) ->
+ A1 div A2;
+ true ->
+ A1 / A2
+ end;
+ '%' -> eval(S,Arg1) rem eval(S,Arg2);
+ '<<' -> eval(S,Arg1) bsl eval(S,Arg2);
+ '>>' -> eval(S,Arg1) bsr eval(S,Arg2);
+ '&' -> eval(S,Arg1) band eval(S,Arg2);
+ '|' -> eval(S,Arg1) bor eval(S,Arg2);
+ '^' -> eval(S,Arg1) bxor eval(S,Arg2);
+ '&&' -> case eval(S,Arg1) of
+ 0 -> 0;
+ _ -> eval(S,Arg2)
+ end;
+ '||' -> case eval(S,Arg1) of
+ 0 -> eval(S,Arg2);
+ V -> V
+ end;
+ '>' -> case eval(S,Arg1) > eval(S,Arg2) of
+ true -> 1;
+ false -> 0
+ end;
+ '>=' -> case eval(S,Arg1) >= eval(S,Arg2) of
+ true -> 1;
+ false -> 0
+ end;
+ '<' -> case eval(S,Arg1) < eval(S,Arg2) of
+ true -> 1;
+ false -> 0
+ end;
+ '<=' -> case eval(S,Arg1) =< eval(S,Arg2) of
+ true -> 1;
+ false -> 0
+ end;
+ '==' -> case eval(S,Arg1) == eval(S,Arg2) of
+ true -> 1;
+ false -> 0
+ end;
+ '!=' -> case eval(S,Arg1) =/= eval(S,Arg2) of
+ true -> 1;
+ false -> 0
+ end;
+ _ ->
+ 0
+ end;
+eval(S,#'UNARY' { op=Op, arg=Arg }) ->
+ case Op of
+ '~' -> bnot eval(S,Arg);
+ '+' -> eval(S,Arg);
+ '-' -> - eval(S,Arg);
+ '!' -> case eval(S,Arg) of
+ 0 -> 1;
+ _ -> 0
+ end;
+ _ -> 0
+ end;
+eval(S,#'CALL' { func=Func, args=Args }) ->
+ case Func of
+ {identifier,_,"defined"} ->
+ case Args of
+ [{identifier,_,ID}] ->
+ defined(S,ID);
+ _ ->
+ 0
+ end;
+ _ -> 0
+ end;
+eval(_S,#'CONSTANT' { base=Base, value=Value }) ->
+ if is_integer(Base) ->
+ erlang:list_to_integer(Value,Base);
+ Base==char -> hd(Value);
+ Base==float -> list_to_float(Value)
+ end;
+eval(S,#'ID' { name=ID }) -> value(S,ID).
+
+%%
+%% scan {id,ID} ID = {L}({L}|{D})*
+%% {num,Decimal}
+%% {string,String} '"' C* '"'
+%% {search,String} '<' C* '>'
+%% {char,[C]}
+
+scan_all(Chars,SkipWs,SkipSearch) ->
+ scan_until(Chars,SkipWs,SkipSearch,fun(_) -> false end).
+
+scan_one(Chars,SkipWs,SkipSearch) ->
+ scan_until(Chars,SkipWs,SkipSearch,fun(_) -> true end).
+
+scan_until_char(Chars,SkipWs,SkipSearch,Char) ->
+ scan_until_token(Chars,SkipWs,SkipSearch,{char,Char}).
+
+scan_until_token(Chars,SkipWs,SkipSearch,Token) ->
+ scan_until(Chars,SkipWs,SkipSearch,fun(Tok) -> Tok==Token end).
+
+scan_until(Chars,SkipWs,SkipSearch,Until) ->
+ scan_until(Chars,SkipWs,SkipSearch,[],Until).
+
+scan_until([$\"|Cs],SkipWs,SkipSearch,Ts,Until) ->
+ scan_quoted(Cs, $\", string, SkipWs, SkipSearch, Ts, [], Until);
+scan_until([$\<|Cs],SkipWs,false,Ts, Until) ->
+ %% we only scan for <abc.h> once
+ scan_quoted(Cs, $\>, search, SkipWs, true, Ts, [], Until);
+
+scan_until([C|Cs], SkipWs, SkipSearch, Ts, Until) ->
+ if C >= $0, C =< $9 ->
+ scan_num(Cs, SkipWs,SkipSearch,Ts,[C],Until);
+ C >= $a, C =< $z; C >= $A, C =< $Z; C == $_ ->
+ scan_id(Cs, SkipWs,SkipSearch,Ts, [C], Until);
+ true ->
+ scan_t(Cs, SkipWs,SkipSearch,{char,[C]}, Ts, Until)
+ end;
+scan_until([], _SkipWs, _SkipSearch, Ts, _Until) ->
+ {reverse(Ts), ""}.
+
+
+scan_t(Cs,true,SkipSearch,{char,[C]},Ts,Until)
+ when C >= 0, C =< $\s ->
+ scan_until(Cs,true,SkipSearch,Ts,Until);
+scan_t(Cs,SkipWs,SkipSearch,T,Ts,Until) ->
+ case Until(T) of
+ true ->
+ {reverse([T|Ts]), Cs};
+ false ->
+ scan_until(Cs,SkipWs,SkipSearch,[T|Ts],Until)
+ end.
+
+%% scan quouted stuff like 'abc' "abc" <abc>
+scan_quoted([Q|Cs], Q, Tag, SkipWs,SkipSearch, Ts, Acc, Until) ->
+ scan_t(Cs, SkipWs, SkipSearch, {Tag,reverse(Acc)}, Ts, Until);
+scan_quoted([$\\,C|Cs], Q, Tag, SkipWs,SkipSearch,Ts,Acc,Until) ->
+ scan_quoted(Cs, Q, Tag, SkipWs,SkipSearch,Ts,[$\\,C|Acc],Until);
+scan_quoted([C|Cs], Q, Tag, SkipWs,SkipSearch,Ts,Acc,Until) ->
+ scan_quoted(Cs, Q, Tag, SkipWs,SkipSearch,Ts,[C|Acc],Until);
+scan_quoted([], Q, Tag, SkipWs,SkipSearch,Ts,Acc,Until) ->
+ %% FIXME: abort scan...
+ scan_t([], SkipWs, SkipSearch, {Tag,reverse(Acc)},Ts,Until).
+
+scan_num([C|Cs], SkipWs,SkipSearch,Ts,Acc,Until)
+ when C >= $0, C =< $9 ->
+ scan_num(Cs,SkipWs,SkipSearch,Ts,[C|Acc],Until);
+scan_num(Cs, SkipWs,SkipSearch,Ts,Acc,Until) ->
+ scan_t(Cs,SkipWs,SkipSearch,{num,reverse(Acc)},Ts,Until).
+
+scan_id([C|Cs],SkipWs,SkipSearch,Ts,Acc,Until)
+ when C >= $a, C =< $z; C >= $A, C =< $Z; C =< $0, C >= $9; C == $_ ->
+ scan_id(Cs,SkipWs,SkipSearch,Ts,[C|Acc],Until);
+scan_id(Cs, SkipWs,SkipSearch,Ts,Acc,Until) ->
+ scan_t(Cs,SkipWs,SkipSearch,{id,reverse(Acc)},Ts,Until).
+
+
+%% try rewrite Var as an integer value if possible
+value(S,Var) ->
+ case dict:find(Var,S#s.defs) of
+ error -> 0;
+ {ok,Val} when integer(Val) -> Val;
+ {ok,Val} when list(Val) ->
+ case catch list_to_integer(Val) of
+ {'EXIT',_} -> Val;
+ N -> N
+ end
+ end.
+
+defined(S,Var) ->
+ case dict:find(Var,S#s.defs) of
+ error -> 0;
+ _ -> 1
+ end.
+
+%% macro operations
+undef(S, Var) ->
+ if Var == "__INCLUDE_LEVEL__" ->
+ warning(S, "can not undefine ~s", [Var]),
+ S;
+ Var == "__LINE__";
+ Var == "__FILE__" ->
+ warning(S, "undefining ~s", [Var]),
+ D = dict:erase(Var, S#s.defs),
+ S#s { defs = D };
+ true ->
+ D = dict:erase(Var, S#s.defs),
+ S#s { defs = D }
+ end.
+
+define(S,Var,Value) ->
+ if Var == "__INCLUDE_LEVEL__" ->
+ warning(S, "can not redefine ~s", [Var]),
+ S;
+ true ->
+ case defined(S,Var) of
+ 1 -> warning(S, "~s redefined\n", [Var]),
+ set(S,Var,Value);
+ 0 ->
+ set(S,Var,Value)
+ end
+ end.
+
+%% set only if defined
+dict_update(D,Var,Value) ->
+ case dict:find(Var, D) of
+ error -> D;
+ _ -> dict:store(Var,Value,D)
+ end.
+
+dict_set(D,Var,Value) ->
+ dict:store(Var,Value,D).
+
+set(S,Var,Value) ->
+ D = dict_set(S#s.defs,Var,Value),
+ S#s { defs = D }.
+
+update_line(S, N) ->
+ N1 = S#s.line + N,
+ DN1 = value(S, "__LINE__") + N,
+ Defs = dict_update(S#s.defs, "__LINE__", DN1),
+ S#s { line = N1, defs = Defs }.
+
+set_line(S, Line) ->
+ D0 = dict_update(S#s.defs, "__LINE__", Line),
+ S#s { defs = D0 }.
+
+set_line(S, Line, File) ->
+ D0 = dict_update(S#s.defs, "__LINE__", Line),
+ D1 = dict_update(D0, "__FILE__", File),
+ S#s { defs = D1 }.
+
+%% warning and errors use __FILE__ / __LINE__ instead of
+%% S#s.line and S#s.file. This is because the #line dirctive should
+%% make sure the errors are refering to the correct file and line
+warning(S, Fmt, As) ->
+ io:format("~s:~w: warning:"++Fmt++"\n",
+ [value(S,"__FILE__"), value(S,"__LINE__") | As]).
+
+error(S, Fmt, As) ->
+ io:format("~s:~w: error:"++Fmt++"\n",
+ [value(S,"__FILE__"), value(S,"__LINE__") | As]).
+
+warning_ts(S, Ts) ->
+ Chars = map(fun({_,Cs}) -> Cs end, Ts),
+ io:format("~s:~w: warning ~s\n",
+ [value(S,"__FILE__"), value(S,"__LINE__"),Chars]).
+
+error_ts(S, Ts) ->
+ Chars = map(fun({_,Cs}) -> Cs end, Ts),
+ io:format("~s:~w: error ~s\n",
+ [value(S,"__FILE__"), value(S,"__LINE__"),Chars]).
View
580 lib/bic/src/bic_leex.erl
@@ -0,0 +1,580 @@
+%% Copyright (C) 1996-99 Ericsson Telecom AB
+%% File : leex.erl
+%% Author : Robert Virding (rv@cslab.ericsson.se)
+%% Purpose : A Lexical Analyser Generator for Erlang.
+
+%% THIS IS A PRE-RELEASE OF LEEX - RELEASED ONLY BECAUSE MANY PEOPLE
+%% WANTED IT - THE OFFICIAL RELEASE WILL PROVIDE A DIFFERENT INCOMPATIBLE
+%% AND BETTER INTERFACE - BE WARNED
+%% PLEASE REPORT ALL BUGS TO THE AUTHOR.
+
+%% Most of the algorithms used here are taken pretty much as
+%% described in the "Dragon Book" by Aho, Sethi and Ullman. Some
+%% completing details were taken from "Compiler Design in C" by
+%% Hollub.
+
+-module(bic_leex).
+
+-copyright('Copyright (c) 1996-99 Ericsson Telecom AB').
+
+-author('rv@cslab.ericsson.se').
+
+-export([gen/1,gen/2,gen/3,format_error/1]).
+
+-import(lists, [member/2,reverse/1,seq/2,keymember/3,keysearch/3,keysort/2,
+ foreach/2]).
+-import(ordsets, [is_element/2,add_element/2,union/2,subtract/2]).
+
+%%-compile([export_all]).
+
+-record(nfa_state, {no,edges=[],accept=noaccept}).
+-record(dfa_state, {no,nfa=[],trans=[],accept=noaccept}).
+
+gen([In]) ->
+ gen([In,In]);
+gen([In,Out]) ->
+ gen(In, Out).
+
+gen(In, Out) ->
+ gen(In, Out, []).
+
+gen(In, Out, Options) ->
+ InFile = filename:rootname(In) ++ ".xrl",
+ OutFile = filename:rootname(Out) ++ ".erl",
+ ModName = filename:rootname(filename:basename(Out)),
+ case parse_file(InFile) of
+ {ok,REAs,Actions,Code} ->
+ {NFA,NF} = build_combined_nfa(REAs),
+ io:fwrite("NFA contains ~w states, ", [size(NFA)]),
+ {DFA0,DF0} = build_dfa(NFA, NF),
+ io:fwrite("DFA contains ~w states, ", [length(DFA0)]),
+ {DFA,DF} = minimise_dfa(DFA0, DF0),
+ io:fwrite("minimised to ~w states.~n", [length(DFA)]),
+ out_file(OutFile, ModName, DFA, DF, Actions, Code);
+ {error,Error} ->
+ io:put_chars([$\n,gcc_error(InFile, Error),$\n]),
+ error
+ end.
+
+
+format_error({open,F}) -> ["error opening ",io_lib:write_string(F)];
+format_error(missing_rules) -> "missing rules";
+format_error(bad_rule) -> "bad rule";
+format_error({regexp,E}) -> ["bad regexp `",regexp:format_error(E),"'"];
+format_error({after_regexp,S}) ->
+ ["bad code after regexp ",io_lib:write_string(S)].
+
+gcc_error(File, {Line,Mod,Error}) ->
+ io_lib:format("~s:~w: ~s", [File,Line,apply(Mod, format_error, [Error])]);
+gcc_error(File, {Mod,Error}) ->
+ io_lib:format("~s: ~s", [File,apply(Mod, format_error, [Error])]).
+
+%% parse_file(InFile) -> {[REA],[Action],Code} | {error,Error}
+%% when
+%% REA = {RegExp,ActionNo};
+%% Action = {ActionNo,ActionString};
+%% Code = [char()].
+%%
+%% Read and parse the file InFile.
+%% After each section of the file has been parsed we directly call the
+%% next section. This is done when we detect a line we don't recognise
+%% in the current section. The file format is very simple and Erlang
+%% token based, we allow empty lines and Erlang style comments.
+
+parse_file(InFile) ->
+ case file:open(InFile, read) of
+ {ok,Ifile} ->
+ io:fwrite("Parsing file ~s, ", [InFile]),
+ case parse_head(Ifile) of
+ {ok,REAs,Actions,Code} ->
+ io:fwrite("contained ~w rules.~n", [length(REAs)]),
+ file:close(Ifile),
+ {ok,REAs,Actions,Code};
+ Error ->
+ file:close(Ifile),
+ Error
+ end;
+ {error,R} ->
+ {error,{leex,{open,InFile}}}
+ end.
+
+%% parse_head(File)
+%% Parse the head of the file.
+
+parse_head(Ifile) ->
+ parse_defs(Ifile, nextline(Ifile, 0)).
+
+%% parse_defs(File, Line)
+%% Parse the macro definition section of a file. Allow no definitions.
+
+parse_defs(Ifile, {ok,[$D,$e,$f,$i,$n,$i,$t,$i,$o,$n,$s,$.|_Rest],L}) ->
+ parse_defs(Ifile, nextline(Ifile, L), []);
+parse_defs(Ifile, Line) ->
+ parse_rules(Ifile, Line, []).
+
+parse_defs(Ifile, {ok,Chars,L}, Ms) ->
+ case string:tokens(Chars, " \t\n") of
+ [Name,"=",Def] ->
+ parse_defs(Ifile, nextline(Ifile, L), [{Name,Def}|Ms]);
+ Other ->
+ parse_rules(Ifile, {ok,Chars,L}, Ms)
+ end;
+parse_defs(Ifile, Line, Ms) ->
+ parse_rules(Ifile, Line, Ms).
+
+%% parse_rules(File, Line, Macros)
+%% Parse the RE rules section of the file. This must exist.
+
+parse_rules(Ifile, {ok,[$R,$u,$l,$e,$s,$.|_Rest],L}, Ms) ->
+ parse_rules(Ifile, nextline(Ifile, L), Ms, [], [], 0);
+parse_rules(Ifile, {ok,Other,L}, Ms) ->
+ {error,{L,leex,missing_rules}};
+parse_rules(Ifile, {eof,L}, Ms) ->
+ {error,{L,leex,missing_rules}}.
+
+collect_rule(Ifile, Chars, L0) ->
+ {match,St,Len} = regexp:first_match(Chars, "[^ \t]+"),
+ %%io:fwrite("RE = ~p~n", [string:substr(Chars, St, Len)]),
+ case collect_rule(Ifile, string:substr(Chars, St+Len), L0, []) of
+ {ok,[{':',Lc}|Toks],L1} -> {ok,string:substr(Chars, St, Len),Toks,L1};
+ {ok,Toks,L1} -> {error,{L0,leex,bad_rule}};
+ {eof,L1} -> {error,{L1,leex,bad_rule}};
+ {error,E,L1} -> {error,E}
+ end.
+
+collect_rule(Ifile, Chars, L0, Cont0) ->
+ case erl_scan:tokens(Cont0, Chars, L0) of
+ {done,{ok,Toks,L1},Rest} -> {ok,Toks,L0};
+ {done,{eof,L1},Rest} -> {eof,L0};
+ {done,{error,E,L1},Rest} -> {error,E,L0};
+ {more,Cont1} ->
+ collect_rule(Ifile, io:get_line(Ifile, leex), L0+1, Cont1)
+ end.
+
+parse_rules(Ifile, {ok,[$E,$r,$l,$a,$n,$g,$ ,$c,$o,$d,$e,$.|_Rest],L},
+ Ms, REAs, As, N) ->
+ %% Must be careful to put rules in correct order!
+ parse_code(Ifile, L, reverse(REAs), reverse(As));
+parse_rules(Ifile, {ok,Chars,L0}, Ms, REAs, As, N) ->
+ %%io:fwrite("~w: ~p~n", [L0,Chars]),
+ case collect_rule(Ifile, Chars, L0) of
+ {ok,Re,Atoks,L1} ->
+ case parse_rule(Re, L0, Atoks, Ms, N) of
+ {ok,REA,A} ->
+ parse_rules(Ifile, nextline(Ifile, L1), Ms,
+ [REA|REAs], [A|As], N+1);
+ {error,E} -> {error,E}
+ end;
+ {error,E} -> {error,E}
+ end;
+parse_rules(Ifile, {eof,Line}, Ms, REAs, As, N) ->
+ %% Must be careful to put rules in correct order!
+ {ok,reverse(REAs),reverse(As),[]}.
+
+%% parse_rule(RegExpString, RegExpLine, ActionTokens, Macros, Counter)
+%% Parse one regexp after performing macro substition.
+
+parse_rule(S, Line, [{dot,Ld}], Ms, N) ->
+ case parse_rule_regexp(S, Ms) of
+ {ok,R} ->
+ {ok,{R,N},{N,empty_action}};
+ {error,E} ->
+ {error,{Line,leex,{regexp,E}}}
+ end;
+parse_rule(S, Line, Atoks, Ms, N) ->
+ case parse_rule_regexp(S, Ms) of
+ {ok,R} ->
+ case erl_parse:parse_exprs(Atoks) of
+ {ok,Aes} ->
+ YYtext = keymember('YYtext', 3, Atoks),
+ {ok,{R,N},{N,Aes,YYtext}};
+ {error,E} ->
+ {error,{Line,leex,{after_regexp,S}}}
+ end;
+ {error,E} ->
+ {error,{Line,leex,{regexp,E}}}
+ end.
+
+parse_rule_regexp(RE0, [{M,Exp}|Ms]) ->
+ case regexp:gsub(RE0, "{" ++ M ++ "}", Exp) of
+ {ok,RE,N} -> parse_rule_regexp(RE, Ms);
+ {error,E} -> parse_rule_regexp(RE0, Ms)
+ end;
+parse_rule_regexp(RE, []) ->
+ %%io:fwrite("RE = ~p~n", [RE]),
+ regexp:parse(RE).
+
+%% parse_code(File, Line, REAs, Actions)
+%% Parse the code section of the file.
+
+parse_code(Ifile, Line, REAs, As) ->
+ {ok,REAs,As,io:get_chars(Ifile, leex, 102400)}.
+
+%% nextline(InputFile, PrevLineNo) -> {ok,Chars,LineNo} | {eof,LineNo}.
+%% Get the next line skipping comment lines and blank lines.
+
+nextline(Ifile, L) ->
+ case io:get_line(Ifile, leex) of
+ eof -> {eof,L};
+ Chars ->
+ case skip(Chars, " \t\n") of
+ [$%|_Rest] -> nextline(Ifile, L+1);
+ [] -> nextline(Ifile, L+1);
+ Other -> {ok,Chars,L+1}
+ end
+ end.
+
+%% skip(Str, Cs) -> lists:dropwhile(fun (C) -> member(C, Cs) end, Str).
+
+skip([C|Str], Cs) ->
+ case member(C, Cs) of
+ true -> skip(Str, Cs);
+ false -> [C|Str]
+ end;
+skip([], Cs) -> [].
+
+%% build_combined_nfa(RegExpActionList) -> {NFA,FirstState}. Build
+%% the combined NFA using Thompson's construction straight out of the
+%% book. Build the separate NFAs in the same order as the rules so
+%% that the accepting have ascending states have ascending state
+%% numbers. Start numbering the states from 1 as we put the states
+%% in a tuple with the state number as the index.
+
+build_combined_nfa(REAs) ->
+ {NFA0,Firsts,Free} = build_nfa_list(REAs, [], [], 1),
+ F = #nfa_state{no=Free,edges=epsilon_trans(Firsts)},
+ {list_to_tuple(keysort(#nfa_state.no, [F|NFA0])),Free}.
+
+build_nfa_list([{RE,Action}|REAs], NFA0, Firsts, Free0) ->
+ {NFA1,Free1,First} = build_nfa(RE, Free0, Action),
+ build_nfa_list(REAs, NFA1 ++ NFA0, [First|Firsts], Free1);
+build_nfa_list([], NFA, Firsts, Free) ->
+ {NFA,reverse(Firsts),Free}.
+
+epsilon_trans(Firsts) -> [ {epsilon,F} || F <- Firsts ].
+
+%% {NFA,NextFreeState,FirstState} = build_nfa(RegExp, FreeState, Action)
+%% When building the NFA states for a ??? we don't build the end
+%% state, just allocate a State for it and return this state
+%% number. This allows us to avoid building unnecessary states for
+%% concatenation which would then have to be removed by overwriting
+%% an existing state.
+
+build_nfa(RE, FreeState, Action) ->
+ {NFA,N,Es} = build_nfa(RE, FreeState+1, FreeState, []),
+ {[#nfa_state{no=Es,accept={accept,Action}}|NFA],N,FreeState}.
+
+%% build_nfa(RegExp, NextState, FirstState, NFA) -> {NFA,NextState,EndState}.
+%% The NFA is a list of nfa_state is no predefined order. The state
+%% number of the returned EndState is already allocated!
+
+build_nfa({'or',RE1,RE2}, N0, Fs, NFA0) ->
+ {NFA1,N1,Es1} = build_nfa(RE1, N0+1, N0, NFA0),
+ {NFA2,N2,Es2} = build_nfa(RE2, N1+1, N1, NFA1),
+ Es = N2,
+ {[#nfa_state{no=Fs,edges=[{epsilon,N0},{epsilon,N1}]},
+ #nfa_state{no=Es1,edges=[{epsilon,Es}]},
+ #nfa_state{no=Es2,edges=[{epsilon,Es}]}|NFA2],
+ N2+1,Es};
+build_nfa({concat,RE1, RE2}, N0, Fs, NFA0) ->
+ {NFA1,N1,Es1} = build_nfa(RE1, N0, Fs, NFA0),
+ {NFA2,N2,Es2} = build_nfa(RE2, N1, Es1, NFA1),
+ {NFA2,N2,Es2};
+build_nfa({kclosure,RE}, N0, Fs, NFA0) ->
+ {NFA1,N1,Es1} = build_nfa(RE, N0+1, N0, NFA0),
+ Es = N1,
+ {[#nfa_state{no=Fs,edges=[{epsilon,N0},{epsilon,Es}]},
+ #nfa_state{no=Es1,edges=[{epsilon,N0},{epsilon,Es}]}|NFA1],
+ N1+1,Es};
+build_nfa({pclosure,RE}, N0, Fs, NFA0) ->
+ {NFA1,N1,Es1} = build_nfa(RE, N0+1, N0, NFA0),
+ Es = N1,
+ {[#nfa_state{no=Fs,edges=[{epsilon,N0}]},
+ #nfa_state{no=Es1,edges=[{epsilon,N0},{epsilon,Es}]}|NFA1],
+ N1+1,Es};
+build_nfa({optional,RE}, N0, Fs, NFA0) ->
+ {NFA1,N1,Es1} = build_nfa(RE, N0+1, N0, NFA0),
+ Es = N1,
+ {[#nfa_state{no=Fs,edges=[{epsilon,N0},{epsilon,Es}]},
+ #nfa_state{no=Es1,edges=[{epsilon,Es}]}|NFA1],
+ N1+1,Es};
+build_nfa({char_class,Cc}, N, Fs, NFA) ->
+ {[#nfa_state{no=Fs,edges=[{char_class(Cc),N}]}|NFA],N+1,N};
+build_nfa({comp_class,Cc}, N, Fs, NFA) ->
+ {[#nfa_state{no=Fs,edges=[{comp_class(Cc),N}]}|NFA],N+1,N};
+build_nfa(C, N, Fs, NFA) when integer(C) ->
+ {[#nfa_state{no=Fs,edges=[{[C],N}]}|NFA],N+1,N}.
+
+char_class(Cc) ->
+ lists:foldl(fun ({C1,C2}, Set) -> union(seq(C1, C2), Set);
+ (C, Set) -> add_element(C, Set) end, [], Cc).
+
+comp_class(Cc) -> subtract(seq(0, 255), char_class(Cc)).
+
+%% build_dfa(NFA, NfaFirstState) -> {DFA,DfaFirstState}.
+%% Build a DFA from an NFA using "subset construction". The major
+%% difference from the book is that we keep the marked and unmarked
+%% DFA states in seperate lists. New DFA states are added to the
+%% unmarked list and states are marked by moving them to the marked
+%% list. We assume that the NFA accepting state numbers are in
+%% ascending order for the rules and use ordsets to keep this order.
+
+build_dfa(NFA, Nf) ->
+ D = #dfa_state{no=0,nfa=eclosure([Nf], NFA)},
+ {build_dfa([D], 1, [], NFA),0}.
+
+%% build_dfa([UnMarked], NextState, [Marked], NFA) -> DFA.
+%% Traverse the unmarked states. Temporarily add the current unmarked
+%% state to the marked list before calculating translation, this is
+%% to avoid adding too many duplicate states. Add it properly to the
+%% marked list afterwards with correct translations.
+
+build_dfa([U|Us0], N0, Ms, NFA) ->
+ {Ts,Us1,N1} = build_dfa(255, U#dfa_state.nfa, Us0, N0, [], [U|Ms], NFA),
+ M = U#dfa_state{trans=Ts,accept=accept(U#dfa_state.nfa, NFA)},
+ build_dfa(Us1, N1, [M|Ms], NFA);
+build_dfa([], N, Ms, NFA) -> Ms.
+
+%% build_dfa(Char, [NfaState], [Unmarked], NextState, [Transition], [Marked], NFA) ->
+%% [Marked].
+%% Foreach NFA state set calculate the legal translations. N.B. must
+%% search *BOTH* the unmarked and marked lists to check if DFA state
+%% already exists. By test characters downwards and prepending
+%% transitions we get the transition lists in ascending order.
+
+build_dfa(C, Set, Us, N, Ts, Ms, NFA) when C >= 0 ->
+ case eclosure(move(Set, C, NFA), NFA) of
+ S when S /= [] ->
+ case keysearch(S, #dfa_state.nfa, Us) of
+ {value,#dfa_state{no=T}} ->
+ build_dfa(C-1, Set, Us, N, [{C,T}|Ts], Ms, NFA);
+ false ->
+ case keysearch(S, #dfa_state.nfa, Ms) of
+ {value,#dfa_state{no=T}} ->
+ build_dfa(C-1, Set, Us, N, [{C,T}|Ts], Ms, NFA);
+ false ->
+ U = #dfa_state{no=N,nfa=S},
+ build_dfa(C-1, Set, [U|Us], N+1, [{C,N}|Ts], Ms, NFA)
+ end
+ end;
+ [] ->
+ build_dfa(C-1, Set, Us, N, Ts, Ms, NFA)
+ end;
+build_dfa(-1, Set, Us, N, Ts, Ms, NFA) ->
+ {Ts,Us,N}.
+
+%% eclosure([State], NFA) -> [State].
+%% move([State], Char, NFA) -> [State].
+%% These are straight out of the book. As eclosure uses ordsets then
+%% the generated state sets are in ascending order.
+
+eclosure(Sts, NFA) -> eclosure(Sts, NFA, []).
+
+eclosure([St|Sts], NFA, Ec) ->
+ #nfa_state{edges=Es} = element(St, NFA),
+ eclosure([ N || {epsilon,N} <- Es,
+ not is_element(N, Ec) ] ++ Sts,
+ NFA, add_element(St, Ec));
+eclosure([], NFA, Ec) -> Ec.
+
+move(Sts, C, NFA) ->
+ [St || N <- Sts,
+ {C1,St} <- (element(N, NFA))#nfa_state.edges,
+ list(C1),
+ member(C, C1) ].
+
+%% accept([State], NFA) -> {accept,A} | noaccept.
+%% Scan down the state list until we find an accepting state.
+
+accept([St|Sts], NFA) ->
+ case element(St, NFA) of
+ #nfa_state{accept={accept,A}} -> {accept,A};
+ #nfa_state{accept=noaccept} -> accept(Sts, NFA)
+ end;
+accept([], NFA) -> noaccept.
+
+%% minimise_dfa(DFA, DfaFirst) -> {DFA,DfaFirst}.
+%% Minimise the DFA by removing equivalent states. We consider a
+%% state if both the transitions and the their accept state is the
+%% same. First repeatedly run throught the DFA state list removing
+%% equivalent states and updating remaining transitions with
+%% remaining equivalent state numbers. When no more reductions are
+%% possible then pack the remaining state numbers to get consecutive
+%% states.
+
+minimise_dfa(DFA0, Df0) ->
+ case min_dfa(DFA0) of
+ {DFA1,[]} -> %No reduction!
+ {DFA2,Rs} = pack_dfa(DFA1),
+ {min_update(DFA2, Rs),min_use(Df0, Rs)};
+ {DFA1,Rs} ->
+ minimise_dfa(min_update(DFA1, Rs), min_use(Df0, Rs))
+ end.
+
+min_dfa(DFA) -> min_dfa(DFA, [], []).
+
+min_dfa([D|DFA0], Rs0, MDFA) ->
+ {DFA1,Rs1} = min_delete(DFA0, D#dfa_state.trans, D#dfa_state.accept,
+ D#dfa_state.no, Rs0, []),
+ min_dfa(DFA1, Rs1, [D|MDFA]);
+min_dfa([], Rs, MDFA) -> {MDFA,Rs}.
+
+min_delete([#dfa_state{no=N,trans=T,accept=A}|DFA], T, A, NewN, Rs, MDFA) ->
+ min_delete(DFA, T, A, NewN, [{N,NewN}|Rs], MDFA);
+min_delete([D|DFA], T, A, NewN, Rs, MDFA) ->
+ min_delete(DFA, T, A, NewN, Rs, [D|MDFA]);
+min_delete([], T, A, NewN, Rs, MDFA) -> {MDFA,Rs}.
+
+min_update(DFA, Rs) ->
+ [ D#dfa_state{trans=min_update_trans(D#dfa_state.trans, Rs)} || D <- DFA ].
+
+min_update_trans(Tr, Rs) ->
+ [ {C,min_use(S, Rs)} || {C,S} <- Tr ].
+
+min_use(Old, [{Old,New}|Reds]) -> New;
+min_use(Old, [R|Reds]) -> min_use(Old, Reds);
+min_use(Old, []) -> Old.
+
+pack_dfa(DFA) -> pack_dfa(DFA, 0, [], []).
+
+pack_dfa([D|DFA], NewN, Rs, PDFA) ->
+ pack_dfa(DFA, NewN+1, [{D#dfa_state.no,NewN}|Rs], [D#dfa_state{no=NewN}|PDFA]);
+pack_dfa([], NewN, Rs, PDFA) -> {PDFA,Rs}.
+
+%% out_file(FileName, ModName, DFA, DfaStart, [Action], Code) -> ok | error.
+
+out_file(OutFile, Mod, DFA, DF, Actions, Code) ->
+ io:fwrite("Writing file ~s, ", [OutFile]),
+ case file:open("bic_leex.hrl", [read]) of
+ {ok,Ifile} ->
+ case file:open(OutFile, write) of
+ {ok,Ofile} ->
+ out_file(Ifile, Ofile, Mod, DFA, DF, Actions, Code),
+ file:close(Ifile),
+ file:close(Ofile),
+ io:fwrite("ok~n"),
+ ok;
+ {error,E} ->
+ file:close(Ifile),
+ io:fwrite("open error~n"),
+ error
+ end;
+ {error,R} ->
+ io:fwrite("open error~n"),
+ error
+ end.
+
+%% out_file(IncFile, OutFile, ModName, DFA, DfaStart, Actions, Code) -> ok.
+%% Copy the include file line by line substituting special lines with
+%% generated code. We cheat by only looking at the first 5
+%% characters.
+
+out_file(Ifile, Ofile, Mod, DFA, DF, Actions, Code) ->
+ case io:get_line(Ifile, leex) of
+ eof -> ok;
+ Line ->
+ case string:substr(Line, 1, 5) of
+ "##mod" -> io:fwrite(Ofile, "-module('~s').~n", [Mod]);
+ "##cod" -> io:put_chars(Ofile, Code);
+ "##dfa" -> out_dfa(Ofile, DFA, DF);
+ "##act" -> out_actions(Ofile, Actions);
+ Other -> io:put_chars(Ofile, Line)
+ end,
+ out_file(Ifile, Ofile, Mod, DFA, DF, Actions, Code)
+ end.
+
+out_dfa(File, DFA, DF) ->
+ io:fwrite(File, "yystate() -> ~w.~n~n", [DF]),
+ foreach(fun (S) -> out_trans(File, S) end, DFA),
+ io:fwrite(File, "yystate(S, Ics, Line, Tlen, Action, Alen) ->~n", []),
+ io:fwrite(File, " {Action,Alen,Tlen,Ics,Line,S}.~n~n", []).
+
+out_trans(File, #dfa_state{no=N,trans=[],accept={accept,A}}) ->
+ io:fwrite(File, "yystate(~w, Ics, Line, Tlen, Action, Alen) ->~n", [N]),
+ io:fwrite(File, " {~w,Tlen,Ics,Line};~n", [A]);
+out_trans(File, #dfa_state{no=N,trans=Tr,accept={accept,A}}) ->
+ foreach(fun (T) -> out_tran(File, N, A, T) end, pack_trans(Tr)),
+ io:fwrite(File, "yystate(~w, Ics, Line, Tlen, Action, Alen) ->~n", [N]),
+ io:fwrite(File, " {~w,Tlen,Ics,Line,~w};~n", [A,N]);
+out_trans(File, #dfa_state{no=N,trans=Tr,accept=noaccept}) ->
+ foreach(fun (T) -> out_tran(File, N, T) end, pack_trans(Tr)),
+ io:fwrite(File, "yystate(~w, Ics, Line, Tlen, Action, Alen) ->~n", [N]),
+ io:fwrite(File, " {Action,Alen,Tlen,Ics,Line,~w};~n", [N]).
+
+out_tran(File, N, A, {{Cf,Cl},S}) ->
+ out_head(File, N, io_lib:write_char(Cf), io_lib:write_char(Cl)),
+ out_body(File, S, "Line", "C", A);
+out_tran(File, N, A, {$\n,S}) ->
+ out_head(File, N, "$\\n"),
+ out_body(File, S, "Line+1", "$\\n", A);
+out_tran(File, N, A, {C,S}) ->
+ Char = io_lib:write_char(C),
+ out_head(File, N, Char),
+ out_body(File, S, "Line", Char, A).
+
+out_tran(File, N, {{Cf,Cl},S}) ->
+ out_head(File, N, io_lib:write_char(Cf), io_lib:write_char(Cl)),
+ out_body(File, S, "Line", "C");
+out_tran(File, N, {$\n,S}) ->
+ out_head(File, N, "$\\n"),
+ out_body(File, S, "Line+1", "$\\n");
+out_tran(File, N, {C,S}) ->
+ Char = io_lib:write_char(C),
+ out_head(File, N, Char),
+ out_body(File, S, "Line", Char).
+
+out_head(File, State, Char) ->
+ io:fwrite(File, "yystate(~w, [~s|Ics], Line, Tlen, Action, Alen) ->\n",
+ [State,Char]).
+
+out_head(File, State, Min, Max) ->
+ io:fwrite(File, "yystate(~w, [C|Ics], Line, Tlen, Action, Alen) when C >= ~s, C =< ~s ->\n",
+ [State,Min,Max]).
+
+out_body(File, Next, Line, C, Action) ->
+ io:fwrite(File, " yystate(~w, Ics, ~s, Tlen+1, ~w, Tlen);\n",
+ [Next,Line,Action]).
+
+out_body(File, Next, Line, C) ->
+ io:fwrite(File, " yystate(~w, Ics, ~s, Tlen+1, Action, Alen);\n",
+ [Next,Line]).
+
+%% pack_tran([{Char,State}]) -> [{Crange,State}] when
+%% Crange = {Char,Char} | Char.
+%% Pack the translation table into something more suitable for
+%% generating code. Ranges of characters with the same State are
+%% packed together, while solitary characters are left "as is". We
+%% KNOW how the pattern matching compiler works so solitary
+%% characters are stored before ranges. We do this using ordsets for
+%% for the packed table. Always break out $\n as solitary character.
+
+pack_trans([{C,S}|Tr]) -> pack_trans(Tr, C, C, S, []);
+pack_trans([]) -> [].
+
+pack_trans([{$\n,S1}|Tr], Cf, Cl, S, Pt) ->
+ pack_trans(Cf, Cl, S, add_element({$\n,S1}, pack_trans(Tr)));
+pack_trans([{C,S}|Tr], Cf, Cl, S, Pt) when C == Cl + 1 ->
+ pack_trans(Tr, Cf, C, S, Pt);
+pack_trans([{C,S1}|Tr], Cf, Cl, S, Pt) ->
+ pack_trans(Tr, C, C, S1, pack_trans(Cf, Cl, S, Pt));
+pack_trans([], Cf, Cl, S, Pt) -> pack_trans(Cf, Cl, S, Pt).
+
+pack_trans(Cf, Cf, S, Pt) -> add_element({Cf,S}, Pt);
+pack_trans(Cf, Cl, S, Pt) when Cl == Cf + 1 ->
+ add_element({Cf,S}, add_element({Cl,S}, Pt));
+pack_trans(Cf, Cl, S, Pt) -> add_element({{Cf,Cl},S}, Pt).
+
+out_actions(File, As) ->
+ foreach(fun (A) -> out_action(File, A) end, As),
+ io:fwrite(File, "yyaction(_, _, _, _) -> error.~n", []).
+
+out_action(File, {A,empty_action}) ->
+ io:fwrite(File, "yyaction(~w, YYlen, YYtcs, YYline) -> skip_token;~n", [A]);
+out_action(File, {A,Code,YYtext}) ->
+ io:fwrite(File, "yyaction(~w, YYlen, YYtcs, YYline) ->~n", [A]),
+ if
+ YYtext == true ->
+ io:fwrite(File, " YYtext = yypre(YYtcs, YYlen),~n", []);
+ YYtext == false -> ok
+ end,
+ io:fwrite(File, " ~s;~n", [erl_pp:exprs(Code, 4, none)]).
View
210 lib/bic/src/bic_leex.hrl
@@ -0,0 +1,210 @@
+%% THIS IS A PRE-RELEASE OF LEEX - RELEASED ONLY BECAUSE MANY PEOPLE
+%% WANTED IT - THE OFFICIAL RELEASE WILL PROVIDE A DIFFERENT INCOMPATIBLE
+%% AND BETTER INTERFACE - BE WARNED
+%% PLEASE REPORT ALL BUGS TO THE AUTHOR.
+
+##module
+
+-export([string/1,string/2,token/2,token/3,tokens/2,tokens/3]).
+-export([format_error/1]).
+
+%% User code. This is placed here to allow extra attributes.
+##code
+
+format_error({illegal,S}) -> ["illegal characters ",io_lib:write_string(S)];
+format_error({user,S}) -> S.
+
+string(String) -> string(String, 1).
+
+string(String, Line) -> string(String, Line, String, []).
+
+%% string(InChars, Line, TokenChars, Tokens) ->
+%% {ok,Tokens,Line} | {error,ErrorInfo,Line}.
+
+string([], L, [], Ts) -> %No partial tokens!
+ {ok,yyrev(Ts),L};
+string(Ics0, L0, Tcs, Ts) ->
+ case yystate(yystate(), Ics0, L0, 0, reject, 0) of
+ {A,Alen,Ics1,L1} -> %Accepting end state
+ string_cont(Ics1, L1, yyaction(A, Alen, Tcs, L1), Ts);
+ {A,Alen,Ics1,L1,S1} -> %After an accepting state
+ string_cont(Ics1, L1, yyaction(A, Alen, Tcs, L1), Ts);
+ {reject,Alen,Tlen,Ics1,L1,S1} ->
+ {error,{L1,?MODULE,{illegal,yypre(Tcs, Tlen+1)}},L1};
+ {A,Alen,Tlen,Ics1,L1,S1} ->
+ string_cont(yysuf(Tcs, Alen), L1, yyaction(A, Alen, Tcs, L1), Ts)
+ end.
+
+%% string_cont(RestChars, Line, Token, Tokens)
+%% Test for and remove the end token wrapper.
+
+string_cont(Rest, Line, {token,T}, Ts) ->
+ string(Rest, Line, Rest, [T|Ts]);
+string_cont(Rest, Line, {end_token,T}, Ts) ->
+ string(Rest, Line, Rest, [T|Ts]);
+string_cont(Rest, Line, skip_token, Ts) ->
+ string(Rest, Line, Rest, Ts);
+string_cont(Rest, Line, {error,S}, Ts) ->
+ {error,{Line,?MODULE,{user,S}},Line}.
+
+%% token(Continuation, Chars, Line) ->
+%% {more,Continuation} | {done,ReturnVal,RestChars}.
+%% Must be careful when re-entering to append the latest characters to the
+%% after characters in an accept.
+
+token(Cont, Chars) -> token(Cont, Chars, 1).
+
+token([], Chars, Line) ->
+ token(Chars, Line, yystate(), Chars, 0, reject, 0);
+token({Line,State,Tcs,Tlen,Action,Alen}, Chars, _) ->
+ token(Chars, Line, State, Tcs ++ Chars, Tlen, Action, Alen).
+
+%% token(InChars, Line, State, TokenChars, TokenLen, Accept) ->
+%% {more,Continuation} | {done,ReturnVal,RestChars}.
+
+token(Ics0, L0, S0, Tcs, Tlen0, A0, Alen0) ->
+ case yystate(S0, Ics0, L0, Tlen0, A0, Alen0) of
+ {A1,Alen1,Ics1,L1} -> %Accepting end state
+ token_cont(Ics1, L1, yyaction(A1, Alen1, Tcs, L1));
+ {A1,Alen1,[],L1,S1} -> %After an accepting state
+ {more,{L1,S1,Tcs,Alen1,A1,Alen1}};
+ {A1,Alen1,Ics1,L1,S1} ->
+ token_cont(Ics1, L1, yyaction(A1, Alen1, Tcs, L1));
+ {A1,Alen1,Tlen1,[],L1,S1} -> %After a non-accepting state
+ {more,{L1,S1,Tcs,Tlen1,A1,Alen1}};
+ {reject,Alen1,Tlen1,eof,L1,S1} ->
+ {done,{eof,L1},[]};
+ {reject,Alen1,Tlen1,Ics1,L1,S1} ->
+ {done,{error,{L1,?MODULE,{illegal,yypre(Tcs, Tlen1+1)}},L1},Ics1};
+ {A1,Alen1,Tlen1,Ics1,L1,S1} ->
+ token_cont(yysuf(Tcs, Alen1), L1, yyaction(A1, Alen1, Tcs, L1))
+ end.
+
+%% tokens_cont(RestChars, Line, Token)
+%% Test if we have detected the end token, if so return done else continue.
+
+token_cont(Rest, Line, {token,T}) ->
+ {done,{ok,T,Line},Rest};
+token_cont(Rest, Line, {end_token,T}) ->
+ {done,{ok,T,Line},Rest};
+token_cont(Rest, Line, skip_token) ->
+ token(Rest, Line, yystate(), Rest, 0, reject, 0);
+token_cont(Rest, Line, {error,S}) ->
+ {done,{error,{Line,?MODULE,{user,S}},Line},Rest}.
+
+%% tokens(Continuation, Chars, Line) ->
+%% {more,Continuation} | {done,ReturnVal,RestChars}.
+%% Must be careful when re-entering to append the latest characters to the
+%% after characters in an accept.
+
+tokens(Cont, Chars) -> tokens(Cont, Chars, 1).
+
+tokens([], Chars, Line) ->
+ tokens(Chars, Line, yystate(), Chars, 0, [], reject, 0);
+tokens({tokens,Line,State,Tcs,Tlen,Ts,Action,Alen}, Chars, _) ->
+ tokens(Chars, Line, State, Tcs ++ Chars, Tlen, Ts, Action, Alen);
+tokens({skip_tokens,Line,State,Tcs,Tlen,Error,Action,Alen}, Chars, _) ->
+ skip_tokens(Chars, Line, State, Tcs ++ Chars, Tlen, Error, Action, Alen).
+
+%% tokens(InChars, Line, State, TokenChars, TokenLen, Tokens, Accept) ->
+%% {more,Continuation} | {done,ReturnVal,RestChars}.
+
+tokens(Ics0, L0, S0, Tcs, Tlen0, Ts, A0, Alen0) ->
+ case yystate(S0, Ics0, L0, Tlen0, A0, Alen0) of
+ {A1,Alen1,Ics1,L1} -> %Accepting end state
+ tokens_cont(Ics1, L1, yyaction(A1, Alen1, Tcs, L1), Ts);
+ {A1,Alen1,[],L1,S1} -> %After an accepting state
+ {more,{tokens,L1,S1,Tcs,Alen1,Ts,A1,Alen1}};
+ {A1,Alen1,Ics1,L1,S1} ->
+ tokens_cont(Ics1, L1, yyaction(A1, Alen1, Tcs, L1), Ts);
+ {A1,Alen1,Tlen1,[],L1,S1} -> %After a non-accepting state
+ {more,{tokens,L1,S1,Tcs,Tlen1,Ts,A1,Alen1}};
+ {reject,Alen1,Tlen1,eof,L1,S1} ->
+ {done,if Ts == [] -> {eof,L1};
+ true -> {ok,yyrev(Ts),L1} end,[]};
+ {reject,Alen1,Tlen1,Ics1,L1,S1} ->
+ skip_tokens(yysuf(Tcs, Tlen1+1), L1,
+ {L1,?MODULE,{illegal,yypre(Tcs, Tlen1+1)}});
+ {A1,Alen1,Tlen1,Ics1,L1,S1} ->
+ tokens_cont(yysuf(Tcs, Alen1), L1, yyaction(A1, Alen1, Tcs, L1), Ts)
+ end.
+
+%% tokens_cont(RestChars, Line, Token, Tokens)
+%% Test if we have detected the end token, if so return done else continue.
+
+tokens_cont(Rest, Line, {token,T}, Ts) ->
+ tokens(Rest, Line, yystate(), Rest, 0, [T|Ts], reject, 0);
+tokens_cont(Rest, Line, {end_token,T}, Ts) ->
+ {done,{ok,yyrev(Ts, [T]),Line},Rest};
+tokens_cont(Rest, Line, skip_token, Ts) ->
+ tokens(Rest, Line, yystate(), Rest, 0, Ts, reject, 0);
+tokens_cont(Rest, Line, {error,S}, Ts) ->
+ skip_tokens(Rest, Line, {Line,?MODULE,{user,S}}).
+
+%% token_skip(InChars, Line, Error) -> {done,ReturnVal,RestChars}.
+%% Skip tokens until an end token, junk everything and return the error.
+
+%%skip_tokens(Ics, Line, Error) -> {done,{error,Error,Line},Ics}.
+
+skip_tokens(Ics, Line, Error) ->
+ skip_tokens(Ics, Line, yystate(), Ics, 0, Error, reject, 0).
+
+%% skip_tokens(InChars, Line, State, TokenChars, TokenLen, Tokens, Accept) ->
+%% {more,Continuation} | {done,ReturnVal,RestChars}.
+
+skip_tokens(Ics0, L0, S0, Tcs, Tlen0, Error, A0, Alen0) ->
+ case yystate(S0, Ics0, L0, Tlen0, A0, Alen0) of
+ {A1,Alen1,Ics1,L1} -> %Accepting end state
+ skip_cont(Ics1, L1, yyaction(A1, Alen1, Tcs, L1), Error);
+ {A1,Alen1,[],L1,S1} -> %After an accepting state
+ {more,{skip_tokens,L1,S1,Tcs,Alen1,Error,A1,Alen1}};
+ {A1,Alen1,Ics1,L1,S1} ->
+ skip_cont(Ics1, L1, yyaction(A1, Alen1, Tcs, L1), Error);
+ {A1,Alen1,Tlen1,[],L1,S1} -> %After a non-accepting state
+ {more,{skip_tokens,L1,S1,Tcs,Tlen1,Error,A1,Alen1}};
+ {reject,Alen1,Tlen1,eof,L1,S1} ->
+ {done,{error,Error,L1},[]};
+ {reject,Alen1,Tlen1,Ics1,L1,S1} ->
+ skip_tokens(yysuf(Tcs, Tlen1), L1, Error);
+ {A1,Alen1,Tlen1,Ics1,L1,S1} ->
+ skip_cont(yysuf(Tcs, Alen1), L1, yyaction(A1, Alen1, Tcs, L1), Error)
+ end.
+
+%% skip_cont(RestChars, Line, Token, Error)
+%% Test if we have detected the end token, if so return done else continue.
+
+skip_cont(Rest, Line, {token,T}, Error) ->
+ skip_tokens(Rest, Line, yystate(), Rest, 0, Error, reject, 0);
+skip_cont(Rest, Line, {end_token,T}, Error) ->
+ {done,{error,Error,Line},Rest};
+skip_cont(Rest, Line, {error,S}, Error) ->