Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse code

Remove a bunch of unused code (and convert from reg.erl to re module)

  • Loading branch information...
commit 543f6a780a86c1ef51e424f9d1fea169cfe9650c 1 parent 90fdbb7
Andrew Thompson authored
10  elibs/conf.erl
@@ -10,9 +10,8 @@ read_conf(Conf) ->
10 10
 
11 11
 convert_path(Host, Path) ->
12 12
   [{Host, {Regex, Transform}}] = ets:lookup(db, Host),
13  
-  case reg:smatch(Path, Regex) of
14  
-    {match, _A, _B, _C, MatchesTuple} ->
15  
-      Matches = tuple_to_list(MatchesTuple),
  13
+	case re:run(Path, Regex, [{capture, all_but_first, list}]) of
  14
+    {match, Matches} ->
16 15
       Binding = create_binding(Matches),
17 16
       % io:format("binding = ~p~n", [Binding]),
18 17
       eval_erlang_expr(Transform, Binding);
@@ -27,9 +26,8 @@ parse_conf_line(Line) ->
27 26
   ets:insert(db, {Host, {Regex, Transform}}).
28 27
 
29 28
 create_binding(Matches) ->
30  
-  Modder = fun(M, Acc) ->
  29
+  Modder = fun(Word, Acc) ->
31 30
     {I, Arr} = Acc,
32  
-    {_A, _B, Word} = M,
33 31
     Mod = {I, Word},
34 32
     {I + 1, lists:append(Arr, [Mod])}
35 33
   end,
@@ -67,4 +65,4 @@ md5_namespace3(Name) ->
67 65
   
68 66
 hexmod8(Name) ->
69 67
   <<A:4, _:124>> = erlang:md5(Name),
70  
-  integer_to_list(A rem 8).
  68
+  integer_to_list(A rem 8).
98  elibs/pipe.erl
... ...
@@ -1,98 +0,0 @@
1  
-% pipe.erl
2  
-%
3  
-% This module implements a pipe data structure. This pipe implementation is
4  
-% designed as a fifo for bytes. You write bytes *to* the pipe and then can
5  
-% read those same bytes *from* the pipe. This is useful when dealing with
6  
-% chunked data from an external port. All of the chunked data can be written
7  
-% to the pipe and then you can read specific numbers of bytes from the pipe.
8  
-% This is necessary if you wish to do your own packet length management.
9  
-%
10  
-% (The MIT License)
11  
-% 
12  
-% Copyright (c) 2008 Tom Preston-Werner
13  
-% 
14  
-% Permission is hereby granted, free of charge, to any person obtaining
15  
-% a copy of this software and associated documentation files (the
16  
-% 'Software'), to deal in the Software without restriction, including
17  
-% without limitation the rights to use, copy, modify, merge, publish,
18  
-% distribute, sublicense, and/or sell copies of the Software, and to
19  
-% permit persons to whom the Software is furnished to do so, subject to
20  
-% the following conditions:
21  
-% 
22  
-% The above copyright notice and this permission notice shall be
23  
-% included in all copies or substantial portions of the Software.
24  
-% 
25  
-% THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
26  
-% EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
27  
-% MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
28  
-% IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
29  
-% CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
30  
-% TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
31  
-% SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
32  
-
33  
--module(pipe).
34  
--export([new/0, write/2, read/2, peek/2, size/1]).
35  
-
36  
--record(pipe, {pos = 0, size = 0, queue = queue:new()}).
37  
-
38  
-new() ->
39  
-  #pipe{}.
40  
-
41  
-write(Bin, Pipe) ->
42  
-  #pipe{size = Size, queue = Q} = Pipe,
43  
-  {ok, Pipe#pipe{size = Size + erlang:size(Bin), queue = queue:in(Bin, Q)}}.
44  
-  
45  
-read(Num, Pipe) ->
46  
-  #pipe{size = Size, queue = Q1} = Pipe,
47  
-  case Num =< Size of
48  
-    true ->
49  
-      {Acc, Q2} = read_internal([], Num, Q1),
50  
-      Bin = list_to_binary(Acc),
51  
-      P2 = Pipe#pipe{size = Size - Num, queue = Q2},
52  
-      {ok, Bin, P2};
53  
-    false ->
54  
-      eof
55  
-  end.
56  
-
57  
-peek(Num, Pipe) ->
58  
-  #pipe{size = Size, queue = Q1} = Pipe,
59  
-  case Num =< Size of
60  
-    true ->
61  
-      {Acc, _} = read_internal([], Num, Q1),
62  
-      Bin = list_to_binary(Acc),
63  
-      {ok, Bin};
64  
-    false ->
65  
-      eof
66  
-  end.
67  
-      
68  
-read_internal(Acc, Num, Q1) ->
69  
-  {{value, Bin}, Q2} = queue:out(Q1),
70  
-  Size = erlang:size(Bin),
71  
-  case spaceship(Num, Size) of
72  
-    -1 ->
73  
-      {B1, B2} = split_binary(Bin, Num),
74  
-      Q3 = queue:in_r(B2, Q2),
75  
-      Acc2 = lists:append(Acc, [B1]),
76  
-      {Acc2, Q3};
77  
-    0 ->
78  
-      Acc2 = lists:append(Acc, [Bin]),
79  
-      {Acc2, Q2};
80  
-    1 ->
81  
-      Acc2 = lists:append(Acc, [Bin]),
82  
-      read_internal(Acc2, Num - Size, Q2)
83  
-  end. 
84  
-      
85  
-size(Pipe) ->
86  
-  Pipe#pipe.size.
87  
-      
88  
-% util
89  
-
90  
-spaceship(A, B) ->
91  
-  case A =< B of
92  
-    true ->
93  
-      case A < B of
94  
-        true -> -1;
95  
-        false -> 0
96  
-      end;
97  
-    false -> 1
98  
-  end.
6  elibs/receive_pack.erl
... ...
@@ -1,6 +0,0 @@
1  
--module(receive_pack).
2  
--export([handle/3]).
3  
-
4  
-handle(Sock, _Host, _Header) ->
5  
-  gen_tcp:send(Sock, "006d\n*********'\n\nYou can't push to git://github.com/user/repo.git\nUse git@github.com:user/repo.git\n\n*********"),
6  
-  ok = gen_tcp:close(Sock).
1,344  elibs/reg.erl
... ...
@@ -1,1344 +0,0 @@
1  
-%% re4 => Upgrade to needed state, checking speed as we go
2  
-%%      0) fixed interval code and copying
3  
-%%      1) upgrade to next_match_XXX (+ slight improvement)
4  
-%%      2) put pos+char in global state arg (no noticable difference)
5  
-%%      3) get look-ahead character for proper eol (- 5-10% slower)
6  
-
7  
--module(reg).
8  
-
9  
--export([parse/1,match/2,first_match/2,matches/2,sub/3,gsub/3,split/2]).
10  
--export([smatch/2,first_smatch/2]).
11  
--export([tt/2,loadf/1]).
12  
-
13  
--import(string, [substr/2,substr/3]).
14  
--import(lists, [reverse/1,reverse/2,last/1,duplicate/2,seq/2]).
15  
--import(lists, [member/2,sort/1,keysearch/3,keysort/2,keydelete/3]).
16  
--import(lists, [map/2,foldl/3]).
17  
--import(ordsets, [is_element/2,add_element/2,union/2,subtract/2]).
18  
-
19  
-%%-compile([export_all]).
20  
-
21  
-%%-define(TP(F,As), io:fwrite(F, As)).
22  
-%%-define(TP(F,As), begin {F,As}, ok end).
23  
--define(TP(F,As), ok).
24  
-
25  
-%% NFA states
26  
-%% State type defines type of transition from the state.
27  
-%% N.B. all types have the id in the same field and all types except
28  
-%% estate have the next state pointer in the same field. This is an
29  
-%% invariant and is used in the code!
30  
-
31  
--record(cstate, {id,c,s}).			%Character state
32  
--record(nstate, {id,cc,s}).			%Character class state
33  
--record(estate, {id,s1,s2}).			%Epsilon state
34  
--record(lstate, {id,n,s}).			%Parentheses states
35  
--record(rstate, {id,n,s}).
36  
--record(pstate, {id,t,s}).			%Position states
37  
-
38  
-%% This is the regular expression grammar used. It is equivalent to the
39  
-%% one used in AWK, except that we allow ^ $ to be used anywhere and fail
40  
-%% in the matching.
41  
-%%
42  
-%% reg -> reg1 : '$1'.
43  
-%% reg1 -> reg1 "|" reg2 : {'or','$1','$2'}.
44  
-%% reg1 -> reg2 : '$1'.
45  
-%% reg2 -> reg2 reg3 : {concat,'$1','$2'}.
46  
-%% reg2 -> reg3 : '$1'.
47  
-%% reg3 -> reg3 "*" : {kclosure,'$1'}.
48  
-%% reg3 -> reg3 "+" : {pclosure,'$1'}.
49  
-%% reg3 -> reg3 "?" : {optional,'$1'}.
50  
-%% reg3 -> reg3 "{" [Min],[Max] "}" : {closure_range, Num, '$1'} see below
51  
-%% reg3 -> reg4 : '$1'.
52  
-%% reg4 -> "(" reg ")" : '$2'.
53  
-%% reg4 -> "\\" char : '$2'.
54  
-%% reg4 -> "^" : bos.
55  
-%% reg4 -> "$" : eos.
56  
-%% reg4 -> "." : char.
57  
-%% reg4 -> "[" class "]" : {char_class,char_class('$2')}
58  
-%% reg4 -> "[" "^" class "]" : {comp_class,char_class('$3')}
59  
-%% reg4 -> "\"" chars "\"" : char_string('$2')
60  
-%% reg4 -> char : '$1'.
61  
-%% reg4 -> empty : epsilon.
62  
-%%  The grammar of the current regular expressions. The actual parser
63  
-%%  is a recursive descent implementation of the grammar.
64  
-
65  
-%% reg(String, NFA, NextState, SubCount) ->
66  
-%%      {Frame,NFA,NewNextState,NewSubCount,RestString}.
67  
-%% Frame = {BegState,[EndState]}.
68  
-
69  
-reg(Cs0) ->
70  
-    {F,Nfa0,N0,Sc,Cs1} = reg(Cs0, [], 1, 1),
71  
-    Nfa1 = [#cstate{id=N0,c=done}|Nfa0],
72  
-    {{Start,_},Nfa2,_} = concat(F, {N0,[N0]}, Nfa1, N0),
73  
-    {ok,{list_to_tuple(keysort(#nstate.id, Nfa2)),Start,Sc-1},Cs1}.
74  
-
75  
-reg(Cs, Nfa, N, Sc) -> reg1(Cs, Nfa, N, Sc).
76  
-
77  
-%% reg1 -> reg2 reg1'
78  
-%% reg1' -> "|" reg2 reg1'
79  
-%% reg1' -> empty
80  
-
81  
-reg1(Cs0, Nfa0, N0, Sc0) ->
82  
-    {F,Nfa1,N1,Sc1,Cs1} = reg2(Cs0, Nfa0, N0, Sc0),
83  
-    reg1p(Cs1, F, Nfa1, N1, Sc1).
84  
-
85  
-reg1p([$||Cs0], Lf, Nfa0, N0, Sc0) ->
86  
-    {Rf,Nfa1,N1,Sc1,Cs1} = reg2(Cs0, Nfa0, N0, Sc0),
87  
-    {F,Nfa2,N2} = alt(Lf, Rf, Nfa1, N1),
88  
-    reg1p(Cs1, F, Nfa2, N2, Sc1);
89  
-reg1p(Cs, F, Nfa, N, Sc) -> {F,Nfa,N,Sc,Cs}.
90  
-
91  
-%% reg2 -> reg3 reg2'
92  
-%% reg2' -> reg3
93  
-%% reg2' -> empty
94  
-
95  
-reg2(S0, Nfa0, N0, Sc0) ->
96  
-    {F,Nfa1,N1,Sc1,S1} = reg3(S0, Nfa0, N0, Sc0),
97  
-    reg2p(S1, F, Nfa1, N1, Sc1).
98  
-
99  
-reg2p([C|_]=Cs0, Lf, Nfa0, N0, Sc0) when C /= $|, C /= $) ->
100  
-    {Rf,Nfa1,N1,Sc1,Cs1} = reg3(Cs0, Nfa0, N0, Sc0),
101  
-    {F,Nfa2,N2} = concat(Lf, Rf, Nfa1, N1),
102  
-    reg2p(Cs1, F, Nfa2, N2, Sc1);
103  
-reg2p(Cs, F, Nfa, N, Sc) -> {F,Nfa,N,Sc,Cs}.
104  
-
105  
-%% reg3 -> reg4 reg3'
106  
-%% reg3' -> "*" reg3'
107  
-%% reg3' -> "+" reg3'
108  
-%% reg3' -> "?" reg3'
109  
-%% reg3' -> "{" [Min],[Max] "}" reg3'
110  
-%% reg3' -> empty
111  
-
112  
-reg3(Cs0, Nfa0, N0, Sc0) ->
113  
-    {F,Nfa1,N1,Sc1,Cs1} = reg4(Cs0, Nfa0, N0, Sc0),
114  
-    reg3p(Cs1, F, Nfa1, N1, Sc1).
115  
-
116  
-reg3p([$*|Cs], Lf, Nfa0, N0, Sc) ->
117  
-    {F,Nfa1,N1} = kclosure(Lf, Nfa0, N0),
118  
-    reg3p(Cs, F, Nfa1, N1, Sc);
119  
-reg3p([$+|Cs], Lf, Nfa0, N0, Sc) ->
120  
-    {F,Nfa1,N1} = pclosure(Lf, Nfa0, N0),
121  
-    reg3p(Cs, F, Nfa1, N1, Sc);
122  
-reg3p([$?|Cs], Lf, Nfa0, N0, Sc) ->
123  
-    {F,Nfa1,N1} = optional(Lf, Nfa0, N0),
124  
-    reg3p(Cs, F, Nfa1, N1, Sc);
125  
-reg3p([${|Cs0], Lf, Nfa0, N0, Sc) ->			% $}
126  
-    %% Have many special case so as not to create unnecessary new states.
127  
-    case interval_range(Cs0) of
128  
-	{0,0,[$}|Cs1]} ->			%This is a null op!
129  
-	    %% The created states have been created but never be refenced!
130  
-	    {Nfa1,N1} = delete(Lf, Nfa0, N0),
131  
- 	    reg3p(Cs1, epsilon, Nfa1, N1, Sc);
132  
-	{0,Max,[$}|Cs1]} when is_integer(Max) ->
133  
-	    {F,Nfa1,N1} = optional(Max, Lf, Nfa0, N0),
134  
-	    %%?TP("I2: ~w x ~w x ~w\nI2 => ~w x ~w\n", [Lf,Max,Nfa0,F,Nfa1]),
135  
-	    reg3p(Cs1, F, Nfa1, N1, Sc);
136  
-	{0,none,[$}|Cs1]} ->			%This is a null op!
137  
-	    %% The created states have been created but never be refenced!
138  
-	    {Nfa1,N1} = delete(Lf, Nfa0, N0),
139  
- 	    reg3p(Cs1, epsilon, Nfa1, N1, Sc);
140  
- 	{0,any,[$}|Cs1]} ->
141  
-	    {F1,Nfa1,N1} = kclosure(Lf, Nfa0, N0),
142  
- 	    reg3p(Cs1, F1, Nfa1, N1, Sc);
143  
-	{Min,Min,[$}|Cs1]} when is_integer(Min) ->
144  
-	    {F,Nfa1,N1} = copy_concat(Min, Lf, Nfa0, N0),
145  
-	    reg3p(Cs1, F, Nfa1, N1, Sc);
146  
- 	{Min,Max,[$}|Cs1]} when is_integer(Min), is_integer(Max), Max >= Min ->
147  
-	    {Fc,Nfa1,N1} = copy(Lf, Nfa0, N0),	%Make copy first!
148  
-	    {F0,Nfa2,N2} = copy_concat(Min, Lf, Nfa1, N1),
149  
-	    {F1,Nfa3,N3} = optional(Max-Min, Fc, Nfa2, N2),
150  
-	    {F2,Nfa4,N4} = concat(F0, F1, Nfa3, N3),
151  
-	    reg3p(Cs1, F2, Nfa4, N4, Sc);
152  
-	{Min,none,[$}|Cs1]} when is_integer(Min) ->
153  
-	    {F,Nfa1,N1} = copy_concat(Min, Lf, Nfa0, N0),
154  
-	    reg3p(Cs1, F, Nfa1, N1, Sc);
155  
- 	{Min,any,[$}|Cs1]} when is_integer(Min) ->
156  
-	    {Fc,Nfa1,N1} = copy(Lf, Nfa0, N0),	%Make copy first!
157  
-	    {F0,Nfa2,N2} = copy_concat(Min, Lf, Nfa1, N1),
158  
-	    {F1,Nfa3,N3} = kclosure(Fc, Nfa2, N2),
159  
-	    {F2,Nfa4,N4} = concat(F0, F1, Nfa3, N3),
160  
- 	    reg3p(Cs1, F2, Nfa4, N4, Sc);
161  
-	{_N,_M,_Cs1} ->				%Catches none,none as well
162  
-	    parse_error({interval_range,[${|Cs0]})
163  
-    end;
164  
-reg3p(Cs, Lf, Nfa, N, Sc) -> {Lf,Nfa,N,Sc,Cs}.
165  
-
166  
-reg4([$(,$?,$:|Cs0], Nfa0, N0, Sc0) ->		% $) A little PERLism!
167  
-    case reg(Cs0, Nfa0, N0, Sc0) of
168  
-	{R,Nfa1,N1,Sc1,[$)|Cs1]} ->
169  
-	    {R,Nfa1,N1,Sc1,Cs1};
170  
-	{_,_,_,_,_} -> parse_error({unterminated,"(?:"})
171  
-    end;
172  
-reg4([$(|Cs0], Nfa0, N0, Sc0) ->		% $)
173  
-    {Lf,Nfa1,N1} = lparen(Sc0, Nfa0, N0),
174  
-    case reg(Cs0, Nfa1, N1, Sc0+1) of
175  
-	{R,Nfa2,N2,Sc2,[$)|Cs1]} ->
176  
-	    {Sf,Nfa3,N3} = rparen(Sc0, R, Lf, Nfa2, N2),
177  
-	    {Sf,Nfa3,N3,Sc2,Cs1};
178  
-	{_,_,_,_,_} -> parse_error({unterminated,"("})
179  
-    end;
180  
-reg4([$^|Cs], Nfa0, N0, Sc) ->
181  
-    {F,Nfa1,N1} = pstate(bos, Nfa0, N0),
182  
-    {F,Nfa1,N1,Sc,Cs};
183  
-reg4([$$|Cs], Nfa0, N0, Sc) ->
184  
-    {F,Nfa1,N1} = pstate(eos, Nfa0, N0),
185  
-    {F,Nfa1,N1,Sc,Cs};
186  
-reg4([$.|Cs], Nfa0, N0, Sc) ->
187  
-    {F,Nfa1,N1} = nstate([{0,9},{11,maxchar}], Nfa0, N0),
188  
-    {F,Nfa1,N1,Sc,Cs};
189  
-reg4([$[,$^|Cs0], Nfa0, N0, Sc) ->
190  
-    case comp_class(Cs0) of
191  
-	{Cc,[$]|Cs1]} ->
192  
- 	    {F,Nfa1,N1} = nstate(Cc, Nfa0, N0),
193  
-	    {F,Nfa1,N1,Sc,Cs1};
194  
-	{_,_} -> parse_error({unterminated,"["})
195  
-    end;
196  
-reg4([$[|Cs0], Nfa0, N0, Sc) ->
197  
-    case char_class(Cs0) of
198  
-	{Cc,[$]|Cs1]} ->
199  
- 	    {F,Nfa1,N1} = nstate(Cc, Nfa0, N0),
200  
-	    {F,Nfa1,N1,Sc,Cs1};
201  
-	{_,_} -> parse_error({unterminated,"["})
202  
-    end;
203  
-reg4([C0|Cs0], Nfa0, N0, Sc) when
204  
-  is_integer(C0), C0 /= $*, C0 /= $+, C0 /= $?, C0 /= $], C0 /= $), C0 /= $} ->
205  
-    %% Handle \ quoted characters as well, at least those we see.
206  
-    {C1,Cs1} = char(C0, Cs0),			%Get the extended char
207  
-    {F,Nfa1,N1} = cstate(C1, Nfa0, N0),
208  
-    {F,Nfa1,N1,Sc,Cs1};
209  
-reg4([$)|_]=Cs, Nfa, N, Sc) -> {epsilon,Nfa,N,Sc,Cs};
210  
-reg4([C|_], _, _, _) -> parse_error({illegal,[C]});
211  
-reg4([], Nfa, N, Sc) ->
212  
-    ?TP("reg4: ~w\n", [{[],Nfa,N,Sc}]),
213  
-    {epsilon,Nfa,N,Sc,[]}.
214  
-
215  
-%%% Is {N,[]} an epsilon state? Is it safe???????
216  
-
217  
-lparen(Sc, Nfa0, N) ->
218  
-    Nfa1 = [#lstate{id=N,n=Sc}|Nfa0],
219  
-    {{N,[N]},Nfa1,N+1}.
220  
-
221  
-rparen(Sc, epsilon, {Lb,Les}, Nfa0, N) ->
222  
-    Nfa1 = patch(Nfa0, Les, N),
223  
-    Nfa2 = [#rstate{id=N,n=Sc}|Nfa1],
224  
-    {{Lb,[N]},Nfa2,N+1};
225  
-rparen(Sc, {B,Es}, {Lb,Les}, Nfa0, N) ->
226  
-    Nfa1 = patch(Nfa0, Les, B),
227  
-    Nfa2 = [#rstate{id=N,n=Sc}|Nfa1],
228  
-    Nfa3 = patch(Nfa2, Es, N),
229  
-    {{Lb,[N]},Nfa3,N+1}.
230  
-
231  
-kclosure(epsilon, Nfa, N) -> {epsilon,Nfa,N};
232  
-kclosure({B,Es}, Nfa0, N) ->
233  
-    Nfa1 = [#estate{id=N,s1=B,s2=none}|Nfa0],
234  
-    {{N,[N]},patch(Nfa1, Es, N),N+1}.
235  
-
236  
-pclosure(epsilon, Nfa, N) -> {epsilon,Nfa,N};
237  
-pclosure({B,Es}, Nfa0, N) ->
238  
-    Nfa1 = [#estate{id=N,s1=B,s2=none}|Nfa0],
239  
-    {{B,[N]},patch(Nfa1, Es, N),N+1}.
240  
-
241  
-optional(epsilon, Nfa, N) -> {epsilon,Nfa,N};
242  
-optional({B,Es}, Nfa0, N) ->
243  
-    Nfa1 = [#estate{id=N,s1=B,s2=none}|Nfa0],
244  
-    {{N,Es ++ [N]},Nfa1,N+1}.
245  
-
246  
-cstate(C, Nfa0, N) ->
247  
-    Nfa1 = [#cstate{id=N,c=C}|Nfa0],
248  
-    {{N,[N]},Nfa1,N+1}.
249  
-
250  
-nstate(Cc, Nfa0, N) ->
251  
-    Nfa1 = [#nstate{id=N,cc=Cc}|Nfa0],
252  
-    {{N,[N]},Nfa1,N+1}.
253  
-
254  
-pstate(Type, Nfa0, N) ->
255  
-    Nfa1 = [#pstate{id=N,t=Type}|Nfa0],
256  
-    {{N,[N]},Nfa1,N+1}.
257  
-
258  
-concat(epsilon, F2, Nfa, N) -> {F2,Nfa,N};
259  
-concat(F1, epsilon, Nfa, N) -> {F1,Nfa,N};
260  
-concat({B1,Es1}, {B2,Es2}, Nfa0, N) ->
261  
-    Nfa1 = patch(Nfa0, Es1, B2),
262  
-    {{B1,Es2},Nfa1,N}.
263  
-
264  
-alt(epsilon, {B2,E2}, Nfa0, N) ->
265  
-    Nfa1 = [#estate{id=N,s1=none,s2=B2}|Nfa0],
266  
-    {{N,[N|E2]},Nfa1,N+1};
267  
-alt({B1,E1}, epsilon, Nfa0, N) ->
268  
-    Nfa1 = [#estate{id=N,s1=B1,s2=none}|Nfa0],
269  
-    {{N,E1 ++ [N]},Nfa1,N+1};
270  
-alt({B1,E1}, {B2,E2}, Nfa0, N) ->
271  
-    Nfa1 = [#estate{id=N,s1=B1,s2=B2}|Nfa0],
272  
-    {{N,E1 ++ E2},Nfa1,N+1}.
273  
-
274  
-%% optional(Count, Frame, Nfa, NextFree) -> {Frame,Nfa,NextFree}.
275  
-%%  M x F => (...((F?)F)?...F)? Is this better than F?F?...F? ?
276  
-%%  Original states will be destructively included in copy.
277  
-%%  If Count == 0 then return epsilon.
278  
-
279  
-optional(M, F, Nfa0, N0) when M > 1 ->
280  
-    {F1,Nfa1,N1} = copy(F, Nfa0, N0),
281  
-    {F2,Nfa2,N2} = optional(M-1, F, Nfa1, N1),
282  
-    {F3,Nfa3,N3} = concat(F1, F2, Nfa2, N2),
283  
-    optional(F3, Nfa3, N3);
284  
-optional(1, F, Nfa, N) -> optional(F, Nfa, N);
285  
-optional(0, _, Nfa, N) -> {epsilon,Nfa,N}.
286  
-
287  
-%% copy_concat(Count, Frame, Nfa, NextFree) -> {Frame,Nfa,NextFree}.
288  
-%%  Make Count copies of sub-expression in Frame concated together.
289  
-%%  Original states will be destructively included in copy.
290  
-%%  If Count == 0 then return epsilon.
291  
-
292  
-copy_concat(M, F0, Nfa0, N0) when M > 1 ->
293  
-    {F1,Nfa1,N1} = copy(F0, Nfa0, N0),
294  
-    {F2,Nfa2,N2} = copy_concat(M-1, F0, Nfa1, N1),
295  
-    concat(F1, F2, Nfa2, N2);
296  
-copy_concat(1, F, Nfa, N) -> {F,Nfa,N};
297  
-copy_concat(0, _, Nfa, N) -> {epsilon,Nfa,N}.
298  
-
299  
-%% copy(Frame, Nfa, NextFree) -> {Frame,Nfa,NextFree}.
300  
-%%  Making a copy of a sub expression is a bit of a pain. We
301  
-%%  recursivley descend from the start through the graph building new
302  
-%%  states as we go back up. We assume that the graph to be copied has
303  
-%%  not been already prepended to another set of states as the
304  
-%%  termination condition is a non-numeric "next state".
305  
-
306  
-copy({B0,Es}, Nfa0, N0) ->
307  
-    {B1,Nfa1,N1,D} = copy(B0, Nfa0, N0, []),
308  
-    %% Build a new list of end states from the new copies.
309  
-    Es1 = map(fun (E) -> {value,{E,E1}} = keysearch(E, 1, D), E1 end, Es),
310  
-    {{B1,Es1},Nfa1,N1}.
311  
-
312  
-copy(B, Nfa0, N0, D0) when is_integer(B) ->
313  
-    case keysearch(B, 1, D0) of
314  
-	{value,{B,Rep}} -> {Rep,Nfa0, N0, D0};
315  
-	false ->
316  
-	    case keysearch(B, #cstate.id, Nfa0) of
317  
-		{value,#estate{s1=S0,s2=T0}=St} ->
318  
-		    {S1,Nfa1,N1,D1} = copy(S0, Nfa0, N0, D0),
319  
-		    {T1,Nfa2,N2,D2} = copy(T0, Nfa1, N1, D1),
320  
-		    Nfa3 = [St#estate{id=N2,s1=S1,s2=T1}|Nfa2],
321  
-		    {N2,Nfa3,N2+1,[{B,N2}|D2]};
322  
-		{value,St0} ->
323  
-		    %% All other state types have the next state in
324  
-		    %% the same place.
325  
-		    S0 = element(#cstate.s, St0),
326  
-		    {S1,Nfa1,N1,D1} = copy(S0, Nfa0, N0, D0),
327  
-		    St1 = setelement(#cstate.id, St0, N1), %{id=N1,s=S1}
328  
-		    St2 = setelement(#cstate.s, St1, S1),
329  
-		    {N1,[St2|Nfa1],N1+1,[{B,N1}|D1]}
330  
-	    end
331  
-%% 		{value,#cstate{s=S0}=St} ->
332  
-%% 		    {S1,Nfa1,N1,D1} = copy(S0, Nfa0, N0, D0),
333  
-%% 		    Nfa2 = [St#cstate{id=N1,s=S1}|Nfa1],
334  
-%% 		    {N1,Nfa2,N1+1,[{B,N1}|D1]};
335  
-    end;
336  
-copy(B, Nfa, N, D) -> {B,Nfa,N,D}.
337  
-
338  
-%% delete(Frame, Nfa, NextFree) -> {Nfa,NextFree}.
339  
-%%  Delete all the states in a frame if possible.
340  
-%%  This is hairy. Can ony delete from the highest element as holes
341  
-%%  not allowed.
342  
-
343  
-delete({B,_}, Nfa, N0) ->
344  
-    Ss0 = span_states(B, Nfa, []),		%All states in this frame
345  
-    Ss1 = reverse(sort(Ss0)),			%Reverse order
346  
-    delete1(Ss1, Nfa, N0).			%Remove until not highest.
347  
-
348  
-delete1([S|Ss], Nfa, N) ->
349  
-    if S == N-1 ->				%Highest id element.
350  
-	    delete1(Ss, keydelete(S, #cstate.id, Nfa), N-1);
351  
-       true -> {Nfa,N}				%No need to go on
352  
-    end;
353  
-delete1([], Nfa, N) -> {Nfa,N}.
354  
-
355  
-span_states(B, Nfa, Seen) when is_integer(B) ->
356  
-    case member(B, Seen) of
357  
-	true -> Seen;
358  
-	false ->
359  
-	    case keysearch(B, #cstate.id, Nfa) of
360  
-		{value,#estate{s1=S,s2=T}} ->
361  
-		    span_states(T, Nfa, span_states(S, Nfa, [B|Seen]));
362  
-		{value,St} ->
363  
-		    %% All other state types have the next state in
364  
-		    %% the same place.
365  
-		    span_states(element(#cstate.s, St), Nfa, [B|Seen])
366  
-	    end
367  
-    end;
368  
-span_states(_, _, Seen) -> Seen.
369  
-
370  
-%% patch(NFA, EndStates, Beginning) -> NFA.
371  
-%%  Patch Endstates so they all point to Beginning.
372  
-
373  
-patch(Nfa, Es, B) ->
374  
-    lists:foldl(fun (E, Nfa0) -> patch1(Nfa0, E, B) end, Nfa, Es).
375  
-
376  
-patch1([#cstate{id=E}=Nst|Nfa], E, B) ->
377  
-    [Nst#cstate{s=B}|Nfa];
378  
-patch1([#nstate{id=E}=Nst|Nfa], E, B) ->
379  
-    [Nst#nstate{s=B}|Nfa];
380  
-%% Patch empty slot of estate, assume there is only 1 empty.
381  
-patch1([#estate{id=E,s1=none}=Nst|Nfa], E, B) ->
382  
-    [Nst#estate{s1=B}|Nfa];
383  
-patch1([#estate{id=E,s2=none}=Nst|Nfa], E, B) ->
384  
-    [Nst#estate{s2=B}|Nfa];
385  
-patch1([#lstate{id=E}=Nst|Nfa], E, B) ->
386  
-    [Nst#lstate{s=B}|Nfa];
387  
-patch1([#rstate{id=E}=Nst|Nfa], E, B) ->
388  
-    [Nst#rstate{s=B}|Nfa];
389  
-patch1([#pstate{id=E}=Nst|Nfa], E, B) ->
390  
-    [Nst#pstate{s=B}|Nfa];
391  
-patch1([Nst|Nfa], E, B) ->
392  
-    [Nst|patch1(Nfa, E, B)].
393  
-
394  
-parse_error(E) -> throw({error,E}).
395  
-
396  
-char($\\, [O1,O2,O3|S]) when
397  
-  O1 >= $0, O1 =< $7, O2 >= $0, O2 =< $7, O3 >= $0, O3 =< $7 ->
398  
-    {(O1*8 + O2)*8 + O3 - 73*$0,S};
399  
-char($\\, [C|S]) -> {escape_char(C),S};
400  
-char($\\, []) -> parse_error({unterminated,"\\"});
401  
-char(C, S) -> {C,S}.
402  
-
403  
-escape_char($n) -> $\n;				%\n = LF
404  
-escape_char($r) -> $\r;				%\r = CR
405  
-escape_char($t) -> $\t;				%\t = TAB
406  
-escape_char($v) -> $\v;				%\v = VT
407  
-escape_char($b) -> $\b;				%\b = BS
408  
-escape_char($f) -> $\f;				%\f = FF
409  
-escape_char($e) -> $\e;				%\e = ESC
410  
-escape_char($s) -> $\s;				%\s = SPACE
411  
-escape_char($d) -> $\d;				%\d = DEL
412  
-escape_char(C) -> C.
413  
-
414  
-char_class([$]|S0]) ->
415  
-    {Cc,S1} = char_class(S0, [$]]),
416  
-    {pack_cc(Cc),S1};
417  
-char_class(S0) ->
418  
-    {Cc,S1} = char_class(S0, []),
419  
-    {pack_cc(Cc),S1}.
420  
-
421  
-comp_class(Cs0) ->
422  
-    {Cc,Cs1} = char_class(Cs0),
423  
-    {comp_class(Cc, 0),Cs1}.
424  
-
425  
-comp_class([{C1,C2}|Crs], Last) ->
426  
-    [{Last,C1-1}|comp_class(Crs, C2+1)];
427  
-comp_class([C|Crs], Last) when Last == C-1 ->
428  
-    [Last|comp_class(Crs, C+1)];
429  
-comp_class([C|Crs], Last) when is_integer(C) ->
430  
-    [{Last,C-1}|comp_class(Crs, C+1)];
431  
-comp_class([], Last) -> [{Last,maxchar}].
432  
-
433  
-%% pack_cc(CharClass) -> CharClass
434  
-%%  Pack and optimise a character class specification (bracket
435  
-%%  expression). First sort it and then compact it.
436  
-
437  
-pack_cc(Cc0) ->
438  
-    %% First sort the list ...
439  
-    Cc1 = lists:usort(fun ({Cf1,_}, {Cf2,_}) -> Cf1 < Cf2;
440  
-			  ({Cf1,_}, C) -> Cf1 < C;
441  
-			  (C, {Cf,_}) -> C < Cf;
442  
-			  (C1, C2) -> C1 =< C2
443  
-		      end, Cc0),
444  
-    %% ... then compact it.
445  
-    pack_cc1(Cc1).
446  
-
447  
-pack_cc1([{Cf1,Cl1},{Cf2,Cl2}|Cc]) when Cl1 >= Cf2, Cl1 =< Cl2 ->
448  
-    %% Cf1       Cl1
449  
-    %%     Cf2       Cl2
450  
-    pack_cc1([{Cf1,Cl2}|Cc]);
451  
-pack_cc1([{Cf1,Cl1},{Cf2,Cl2}|Cc]) when Cl1 >= Cf2, Cl1 >= Cl2 ->
452  
-    %% Cf1       Cl1
453  
-    %%     Cf2 Cl2
454  
-    pack_cc1([{Cf1,Cl1}|Cc]);
455  
-pack_cc1([{Cf1,Cl1},{Cf2,Cl2}|Cc]) when Cl1+1 == Cf2 ->
456  
-    %% Cf1    Cl1
457  
-    %%           Cf2   Cl2
458  
-    pack_cc1([{Cf1,Cl2}|Cc]);
459  
-pack_cc1([{Cf,Cl},C|Cc]) when Cl >= C -> pack_cc1([{Cf,Cl}|Cc]);
460  
-pack_cc1([{Cf,Cl},C|Cc]) when Cl+1 == C -> pack_cc1([{Cf,C}|Cc]);
461  
-pack_cc1([C,{Cf,Cl}|Cc]) when C == Cf-1 -> pack_cc1([{C,Cl}|Cc]);
462  
-pack_cc1([C1,C2|Cc]) when C1+1 == C2 -> pack_cc1([{C1,C2}|Cc]);
463  
-pack_cc1([C|Cc]) -> [C|pack_cc1(Cc)];
464  
-pack_cc1([]) -> [].
465  
-
466  
-char_class("[:" ++ S0, Cc0) ->			%Start of POSIX char class
467  
-    case posix_cc(S0, Cc0) of
468  
-	{Cc1,":]" ++ S1} -> char_class(S1, Cc1);
469  
-	{_,_S1} -> parse_error({posix_cc,"[:" ++ S0})
470  
-    end;
471  
-char_class([C1|S0], Cc) when C1 /= $] ->
472  
-    case char(C1, S0) of
473  
-	{Cf,[$-,C2|S1]} when C2 /= $] ->
474  
-	    case char(C2, S1) of
475  
-		{Cl,S2} when Cf < Cl -> char_class(S2, [{Cf,Cl}|Cc]); 
476  
-		{_Cl,_S2} -> parse_error({char_class,[C1|S0]})
477  
-	    end;
478  
-	{C,S1} -> char_class(S1, [C|Cc])
479  
-    end;
480  
-char_class(S, Cc) -> {Cc,S}.
481  
-
482  
-%% posix_cc(String, CharClass) -> {NewCharClass,RestString}.
483  
-%%  Handle POSIX character classes, use Latin-1 character set.
484  
-
485  
-posix_cc("alnum" ++ S, Cc) ->
486  
-    {[{$0,$9},{$A,$Z},{192,214},{216,223},{$a,$z},{224,246},{248,255}|Cc],S};
487  
-posix_cc("alpha" ++ S, Cc) ->
488  
-    {[{$A,$Z},{192,214},{216,223},{$a,$z},{224,246},{248,255}|Cc],S};
489  
-posix_cc("blank" ++ S, Cc) -> {[$\s,$\t,160|Cc],S};
490  
-posix_cc("cntrl" ++ S, Cc) -> {[{0,31},{127,159}|Cc],S};
491  
-posix_cc("digit" ++ S, Cc) -> {[{$0,$9}|Cc],S};
492  
-posix_cc("graph" ++ S, Cc) -> {[{33,126},{161,255}|Cc],S};
493  
-posix_cc("lower" ++ S, Cc) -> {[{$a,$z},{224,246},{248,255}|Cc],S};
494  
-posix_cc("print" ++ S, Cc) -> {[{32,126},{160,255}|Cc],S};
495  
-posix_cc("punct" ++ S, Cc) -> {[{$!,$/},{$:,$?},{${,$~},{161,191}|Cc],S};
496  
-posix_cc("space" ++ S, Cc) -> {[$\s,$\t,$\f,$\r,$\v,160|Cc],S};
497  
-posix_cc("upper" ++ S, Cc) -> {[{$A,$Z},{192,214},{216,223}|Cc],S};
498  
-posix_cc("xdigit" ++ S, Cc) -> {[{$a,$f},{$A,$F},{$0,$9}|Cc],S};
499  
-posix_cc(S, _Cc) -> parse_error({posix_cc,"[:" ++ S}).
500  
-
501  
-interval_range(Cs0) ->
502  
-    case number(Cs0) of
503  
-	{none,Cs1} -> {none,none,Cs1};
504  
-	{N,[$,|Cs1]} ->
505  
-	    case number(Cs1) of
506  
-		{none,Cs2} -> {N,any,Cs2};
507  
-		{M,Cs2} -> {N,M,Cs2}
508  
-	    end;
509  
-	{N,Cs1} -> {N,none,Cs1}
510  
-    end.
511  
-
512  
-number([C|Cs]) when C >= $0, C =< $9 ->
513  
-    number(Cs, C - $0);
514  
-number(Cs) -> {none,Cs}.
515  
-
516  
-number([C|Cs], Acc) when C >= $0, C =< $9 ->
517  
-    number(Cs, 10*Acc + (C - $0));
518  
-number(Cs, Acc) -> {Acc,Cs}.
519  
-
520  
-%% The interface functions.
521  
-
522  
-parse(Cs) ->
523  
-    case catch reg(Cs) of
524  
-	{ok,R,[]} -> {ok,{nfa,R}};
525  
-	{ok,_R,[C|_]} -> {error,{illegal,[C]}};
526  
-	{error,E} -> {error,E}
527  
-    end.
528  
-
529  
-%% match(String, RegExp) -> {match,Start,Length} | nomatch | {error,E}.
530  
-%%  Find the longest match of RegExp in String.
531  
-
532  
-match(S, RegExp) when is_list(RegExp) ->
533  
-    case parse(RegExp) of
534  
-	{ok,RE} -> match(S, RE);
535  
-	{error,E} -> {error,E}
536  
-    end;
537  
-match(S, {nfa,NFA}) when is_binary(S) ->
538  
-    case match_bin(S, 1, NFA, 0, -1) of
539  
-	{Start,Len} when Len >= 0 -> {match,Start,Len};
540  
-	{_,_} -> nomatch
541  
-    end;
542  
-match(S, {nfa,NFA}) ->
543  
-    case match_str(S, 1, NFA, 0, -1) of
544  
-	{Start,Len} when Len >= 0 -> {match,Start,Len};
545  
-	{_,_} -> nomatch
546  
-    end.
547  
-
548  
-match_str(Cs0, P, Nfa, Mst, Mlen) ->
549  
-    case next_match_str(Cs0, P, Nfa) of
550  
-	{match,St,Len,[_|Cs],_} ->
551  
-	    if Len > Mlen -> match_str(Cs, St+1, Nfa, St, Len);
552  
-	       true -> match_str(Cs, St+1, Nfa, Mst, Mlen)
553  
-	    end;
554  
-	{match,St,Len,[],_} ->			%Empty match at end
555  
-	    if Len > Mlen -> {St,Len};
556  
-	       true -> {Mst,Mlen}
557  
-	    end;
558  
-	nomatch -> {Mst,Mlen}
559  
-    end.
560  
-
561  
-match_bin(Bin, P, Nfa, Mst, Mlen) ->
562  
-    case next_match_bin(Bin, P, Nfa) of
563  
-	{match,St,Len} when St+Len == size(Bin) -> %Empty match at end
564  
-	    if Len > Mlen -> {St,Len};
565  
-	       true -> {Mst,Mlen}
566  
-	    end;
567  
-	{match,St,Len} ->
568  
-	    if Len > Mlen -> match_bin(Bin, St+1, Nfa, St, Len);
569  
-	       true -> match_bin(Bin, St+1, Nfa, Mst, Mlen)
570  
-	    end;
571  
-	nomatch -> {Mst,Mlen}
572  
-    end.
573  
-
574  
-%% match1(String, RegExp) -> {match,Start,Length} | nomatch | {error,E}.
575  
-%% first_match(String, RegExp) -> {match,Start,Length} | nomatch | {error,E}.
576  
-%%  Find the first match of RegExp in String, return Start and Length.
577  
-
578  
-first_match(S, RegExp) when is_list(RegExp) ->
579  
-    {ok,RE} = parse(RegExp),
580  
-    first_match(S, RE);
581  
-first_match(S, {nfa,RE}) when is_binary(S) ->
582  
-    first_match_bin(S, 1, RE);
583  
-first_match(S, {nfa,RE}) ->
584  
-    first_match_str(S, 1, RE).
585  
-
586  
-first_match_str(Cs, P, Nfa) ->
587  
-    case next_match_str(Cs, P, Nfa) of
588  
-	{match,St,Len,_,_} -> {match,St,Len};
589  
-	nomatch -> nomatch
590  
-    end.
591  
-
592  
-first_match_bin(Bin, P0, Nfa) ->
593  
-    case next_match_bin(Bin, P0, Nfa) of
594  
-	{match,St,Len} -> {match,St,Len};
595  
-	nomatch -> nomatch
596  
-    end.
597  
-
598  
-%% smatch(String, RegExp) ->
599  
-%%      {match,Start,Length,String,SubExprs} | nomatch | {error,E}.
600  
-%%  Find the longest match of RegExp in String.
601  
-
602  
-smatch(S, RegExp) when is_list(RegExp) ->
603  
-    case parse(RegExp) of
604  
-	{ok,RE} -> smatch(S, RE);
605  
-	{error,E} -> {error,E}
606  
-    end;
607  
-smatch(S, {nfa,Nfa}) when is_binary(S) ->
608  
-    case smatch_bin(S, 1, Nfa, {0,-1,none}) of
609  
-	{St,Len,Subs} when Len >= 0 ->
610  
-	    {match,St,Len,bin_to_list(S, St, Len),fix_subs_bin(Subs, S)};
611  
-	{_,_,_} -> nomatch
612  
-    end;
613  
-smatch(S, {nfa,Nfa}) ->
614  
-    case smatch_str(S, 1, Nfa, {0,-1,[],none}) of
615  
-	{St,Len,Cs,Subs} when Len >= 0 ->
616  
-	    {match,St,Len,substr(Cs, 1, Len),fix_subs_str(Subs, St, Cs)};
617  
-	{_,_,_,_} -> nomatch
618  
-    end.
619  
-
620  
-smatch_str(Cs0, P, Nfa, {_,Mlen,_,_}=M) ->
621  
-    case next_smatch_str(Cs0, P, Nfa) of
622  
-	{match,St,Len,[_|Cs]=Cs1,Subs,_} ->		%Found a match
623  
-	    if Len > Mlen -> smatch_str(Cs, St+1, Nfa, {St,Len,Cs1,Subs});
624  
-	       true -> smatch_str(Cs, St+1, Nfa, M)
625  
-	    end;
626  
-	{match,St,Len,[],Subs,_} ->
627  
-	    if Len > Mlen -> {St,Len,[],Subs};
628  
-	       true -> M
629  
-	    end;
630  
-	nomatch -> M
631  
-    end.
632  
-
633  
-smatch_bin(Bin, P, Nfa, {_,Mlen,_}=M) ->
634  
-    case next_smatch_bin(Bin, P, Nfa) of
635  
-	{match,St,Len,Subs} when St+Len == size(Bin) ->
636  
-	    if Len > Mlen -> {St,Len,Subs};
637  
-	       true -> M
638  
-	    end;
639  
-	{match,St,Len,Subs} ->
640  
-	    if Len > Mlen -> smatch_bin(Bin, St+1, Nfa, {St,Len,Subs});
641  
-	       true -> smatch_bin(Bin, St+1, Nfa, M)
642  
-	    end;
643  
-	nomatch -> M
644  
-    end.
645  
-
646  
-%% first_smatch(String, RegExp) ->
647  
-%%       {match,Start,Length,SubExprs} | nomatch | {error,E}.
648  
-%%  Find the longest match of RegExp in String, return Start and Length
649  
-%%  as well as tuple of sub-expression matches.
650  
-
651  
-first_smatch(S, RegExp) when is_list(RegExp) ->
652  
-    {ok,RE} = parse(RegExp),
653  
-    first_smatch(S, RE);
654  
-first_smatch(S, {nfa,RE}) when is_binary(S) ->
655  
-    first_smatch_bin(S, 1, RE);
656  
-first_smatch(S, {nfa,RE}) ->
657  
-    first_smatch_str(S, 1, RE).
658  
-
659  
-first_smatch_str(Cs, P, Nfa) ->
660  
-    case next_smatch_str(Cs, P, Nfa) of
661  
-	{match,St,Len,_,Subs,_} -> {match,St,Len,fix_subs_str(Subs,1,Cs)};
662  
-	nomatch -> nomatch
663  
-    end.
664  
-
665  
-first_smatch_bin(Bin, P, Nfa) ->
666  
-    case next_smatch_bin(Bin, P, Nfa) of
667  
-	{match,St,Len,Subs} -> {match,St,Len,fix_subs_bin(Subs, Bin)};
668  
-	nomatch -> nomatch
669  
-    end.
670  
-
671  
-%% matches(String, RegExp) -> {match,[{Start,Length}]} | {error,E}.
672  
-%%  Return the all the non-overlapping matches of RegExp in String.
673  
-
674  
-matches(S, RegExp) when is_list(RegExp) ->
675  
-    case parse(RegExp) of
676  
-	{ok,RE} -> matches(S, RE);
677  
-	{error,E} -> {error,E}
678  
-    end;
679  
-matches(S, {nfa,NFA}) when is_binary(S) ->
680  
-    {match,matches_bin(S, 1, NFA)};
681  
-matches(S, {nfa,NFA}) ->
682  
-    {match,matches_str(S, 1, NFA)}.
683  
-
684  
-matches_str(Cs0, P0, Nfa) ->
685  
-    case next_match_str(Cs0, P0, Nfa) of
686  
-	{match,St,0,_,[_|Cs1]} ->
687  
-	    [{St,0}|matches_str(Cs1, St+1, Nfa)];
688  
-	{match,St,0,_,[]} -> [{St,0}];
689  
-	{match,St,Len,_,Cs1} ->
690  
-	    [{St,Len}|matches_str(Cs1, St+Len, Nfa)];
691  
-	nomatch -> []
692  
-    end.
693  
-
694  
-matches_bin(Bin, P0, Nfa) ->
695  
-    case next_match_bin(Bin, P0, Nfa) of
696  
-	{match,St,0} when St =< size(Bin) ->
697  
-	    [{St,0}|matches_bin(Bin, St+1, Nfa)];
698  
-	{match,St,0} -> [{St,0}];
699  
-	{match,St,Len} ->
700  
-	    [{St,Len}|matches_bin(Bin, St+Len, Nfa)];
701  
-	nomatch -> []
702  
-    end.
703  
-
704  
-%% sub(String, RegExp, Replace) -> {ok,RepString,RepCount} | {error,E}.
705  
-%%  Substitute the first match of the regular expression RegExp with
706  
-%%  the string Replace in String. Accept pre-parsed regular
707  
-%%  expressions.
708  
-
709  
-sub(S, RegExp, Rep) when is_list(RegExp) ->
710  
-    case parse(RegExp) of
711  
-	{ok,RE} -> sub(S, RE, Rep);
712  
-	{error,E} -> {error,E}
713  
-    end;
714  
-sub(S, {nfa,Nfa}, Rep) when is_binary(S) ->
715  
-    case sub_bin(S, 1, Nfa, Rep) of
716  
-	{yes,NewBin} -> {ok,list_to_binary(NewBin),1};
717  
-	no -> {ok,S,0}
718  
-    end;
719  
-sub(S, {nfa,Nfa}, Rep) ->
720  
-    case sub_str(S, 1, Nfa, Rep) of
721  
-	{yes,NewStr} -> {ok,NewStr,1};
722  
-	no -> {ok,S,0}
723  
-    end.
724  
-
725  
-%% sub_str(String, Position, NFA, Replacement) ->
726  
-%%      {yes,NewString} | no.
727  
-%% sub_bin(String, Position, NFA, Replacement) ->
728  
-%%      {yes,NewString} | no.
729  
-%% Step forward over String until a match is found saving stepped over
730  
-%% chars in Before. Return reversed Before prepended to replacement
731  
-%% and rest of string.
732  
-
733  
-sub_str(Cs0, P, Nfa, Rep) ->
734  
-    case next_match_str(Cs0, P, Nfa) of
735  
-	{match,St,Len,Cs,Cs1} ->
736  
-	    {yes,substr_app(St-P, Cs0,
737  
-			    sub_repl(Rep, substr(Cs, 1, Len), Cs1))};
738  
-	nomatch -> no
739  
-    end.
740  
-
741  
-substr_app(0, _, App) -> App;
742  
-substr_app(N, [C|Cs], App) ->
743  
-    [C|substr_app(N-1, Cs, App)];
744  
-substr_app(_, [], App) -> App.
745  
-
746  
-sub_bin(Bin, P, Nfa, Rep) ->
747  
-    case next_match_bin(Bin, P, Nfa) of
748  
-	{match,St,Len} ->
749  
-	    {yes,[sub_bin(Bin, P, St - P),
750  
-		  sub_repl(Rep, binary_to_list(Bin, St, St+Len-1),
751  
-			   sub_bin(Bin, St+Len))]};
752  
-	nomatch -> no
753  
-    end.
754  
-
755  
-sub_repl([$&|Rep], M, Rest) -> M ++ sub_repl(Rep, M, Rest);
756  
-sub_repl([$\\,$&|Rep], M, Rest) -> [$&|sub_repl(Rep, M, Rest)];
757  
-sub_repl([C|Rep], M, Rest) -> [C|sub_repl(Rep, M, Rest)];
758  
-sub_repl([], _M, Rest) -> Rest.
759  
-
760  
-%%  gsub(String, RegExp, Replace) -> {ok,RepString,RepCount} | {error,E}.
761  
-%%  Substitute every match of the regular expression RegExp with the
762  
-%%  string New in String. Accept pre-parsed regular expressions.
763  
-
764  
-gsub(S, RegExp, Rep) when is_list(RegExp) ->
765  
-    case parse(RegExp) of
766  
-	{ok,RE} -> gsub(S, RE, Rep);
767  
-	{error,E} -> {error,E}
768  
-    end;
769  
-gsub(S, {nfa,Nfa}, Rep) when is_binary(S) ->
770  
-    case gsub_bin(S, 1, Nfa, Rep) of
771  
-	{NewStr,N} -> {ok,list_to_binary(NewStr),N};
772  
-	no -> {ok,S,0}				%No substitutions
773  
-    end;
774  
-gsub(S, {nfa,Nfa}, Rep) ->
775  
-    case gsub_str(S, 1, Nfa, Rep) of
776  
-	{NewStr,N} -> {ok,NewStr,N};
777  
-	no -> {ok,S,0}				%No substitutions
778  
-    end.
779  
-
780  
-%% gsub_str(String, Position, NFA, Replacement) ->
781  
-%%      {NewString,Count} | no.
782  
-%% Step forward over String until a match is found saving stepped over
783  
-%% chars in Before. Call recursively to do rest of string after
784  
-%% match. Return reversed Before prepended to return from recursive
785  
-%% call.
786  
-
787  
-gsub_str(Cs0, P, Nfa, Rep) ->
788  
-    case next_match_str(Cs0, P, Nfa) of
789  
-	{match,St,0,_,[C|Cs1]} ->
790  
-	    {New,N} = gsub_str(Cs1, St+1, Nfa, Rep),
791  
-	    {substr_app(St-P, Cs0, sub_repl(Rep, [], [C|New])),N+1};
792  
-	{match,_,0,_,[]} -> {sub_repl(Rep, [], []),1};
793  
-	{match,St,Len,Cs,Cs1} ->
794  
-	    {New,N} = gsub_str(Cs1, St+Len, Nfa, Rep),
795  
-	    {substr_app(St-P, Cs0,
796  
-			sub_repl(Rep, substr(Cs, 1, Len), New)),N+1};
797  
-	nomatch -> {Cs0,0}
798  
-    end.
799  
-
800  
-gsub_bin(Bin, P, Nfa, Rep) ->
801  
-    case next_match_bin(Bin, P, Nfa) of
802  
-	{match,St,0} when St =< size(Bin) ->
803  
-	    {New,N} = gsub_bin(Bin, St+1, Nfa, Rep),
804  
-	    New1 = binary_to_list(Bin, St, St) ++ New,
805  
-	    {[sub_bin(Bin, P, St - P), sub_repl(Rep, [], New1)],N+1};
806  
-	{match,_,0} -> {sub_repl(Rep, [], []),1};
807  
-	{match,St,Len} ->
808  
-	    {New,N} = gsub_bin(Bin, St+Len, Nfa, Rep),
809  
-	    {[sub_bin(Bin, P, St - P),
810  
-	      sub_repl(Rep, binary_to_list(Bin, St, St+Len-1), New)], N+1};
811  
-	nomatch -> {sub_bin(Bin, P),0}
812  
-    end.
813  
-
814  
-%% split(String, RegExp) -> {ok,[SubString]} | {error,E}.
815  
-%%  Split a string into substrings where the RegExp describes the
816  
-%%  field seperator. The RegExp " " is specially treated.
817  
-
818  
-split(S, " ") -> split(S, "[ \t]+", true);	%This is really special!
819  
-split(S, Regexp) -> split(S, Regexp, false).
820  
-
821  
-split(S, Regexp, Trim) when is_list(Regexp) ->
822  
-    case parse(Regexp) of
823  
-	{ok,RE} -> split(S, RE, Trim);
824  
-	{error,E} -> {error,E}
825  
-    end;
826  
-split(S, {nfa,Nfa}, Trim) when is_binary(S) ->
827  
-    case split_bin(S, 1, Nfa, Trim) of
828  
-	[[]|Ss] when Trim -> {ok,Ss};
829  
-	Ss -> {ok,Ss}
830  
-    end;
831  
-split(S, {nfa,Nfa}, Trim) ->
832  
-    case split_str(S, 1, Nfa, Trim) of
833  
-	[[]|Ss] when Trim -> {ok,Ss};
834  
-	Ss -> {ok,Ss}
835  
-    end.
836  
-
837  
-split_str(Cs0, P, Nfa, Trim) ->
838  
-    case next_match_str(Cs0, P, Nfa) of
839  
-	{match,St,0,_,[C|Cs1]} ->
840  
-	    Ss1 = case split_str(Cs1, St+1, Nfa, Trim) of
841  
-		      [S1|Ss] -> [[C|S1]|Ss];
842  
-		      [] -> [[C]]
843  
-		  end,
844  
-	    [substr(Cs0, 1, St-P)|Ss1];
845  
-	{match,St,0,_,[]} -> [substr(Cs0, 1, St-P)];
846  
-	{match,St,Len,_,Cs1} ->
847  
-	    [substr(Cs0, 1, St-P)|split_str(Cs1, St+Len, Nfa, Trim)];
848  
-	nomatch ->
849  
-	    if Trim, Cs0 == [] -> [];
850  
-	       true -> [Cs0]
851  
-	    end
852  
-    end.
853  
-
854  
-split_bin(Bin, P, Nfa, Trim) ->
855  
-    case next_match_bin(Bin, P, Nfa) of
856  
-	{match,St,0} when St =< size(Bin) ->
857  
-	    C = bin_to_list(Bin, St, 1),
858  
-	    Ss1 = case split_bin(Bin, St+1, Nfa, Trim) of
859  
-		      [S1|Ss] -> [list_to_binary([C|S1])|Ss];
860  
-		      [] -> [C]
861  
-		  end,
862  
-	    [sub_bin(Bin, P, St-P)|Ss1];
863  
-	{match,St,0} -> [sub_bin(Bin, P, St-P)];
864  
-	{match,St,Len} ->
865  
-	    [sub_bin(Bin, P, St-P)|split_bin(Bin, St+Len, Nfa, Trim)];
866  
-	nomatch ->
867  
-	    if Trim, P > size(Bin) -> [];
868  
-	       P > size(Bin) -> [<<>>];
869  
-	       true -> [sub_bin(Bin, P)]
870  
-	    end
871  
-    end.
872  
-
873  
-fix_subs_str(Subs, St, S) ->
874  
-    Subsl = fix_subs_str(Subs, St, S, size(Subs), []),
875  
-    list_to_tuple(Subsl).
876  
-
877  
-fix_subs_str(_, _, _, 0, Ss) -> Ss;
878  
-fix_subs_str(Subs, P, S, N, Ss) ->
879  
-     E = case element(N, Subs) of
880  
-	     {St,L} -> {-St,L,substr(S, -St-P+1, L)};
881  
-	     undefined -> undefined
882  
-	 end,
883  
-    fix_subs_str(Subs, P, S, N-1, [E|Ss]).
884  
-
885  
-fix_subs_bin(Subs, Bin) ->
886  
-    Subsl = fix_subs_bin(Subs, Bin, size(Subs), []),
887  
-    list_to_tuple(Subsl).
888  
-
889  
-fix_subs_bin(_, _, 0, Ss) -> Ss;
890  
-fix_subs_bin(Subs, Bin, N, Ss) ->
891  
-     E = case element(N, Subs) of
892  
-	     {St,L} -> {-St,L,bin_to_list(Bin, -St, L)};
893  
-	     undefined -> undefined
894  
-	 end,
895  
-    fix_subs_bin(Subs, Bin, N-1, [E|Ss]).
896  
-
897  
-%% bin_to_list(Binary, Start) -> Chars.
898  
-%% bin_to_list(Binary, Start, Length) -> Chars.
899  
-%%  As it should be!
900  
-
901  
-% bin_to_list(Bin, St) -> binary_to_list(Bin, St, size(Bin)).
902  
-
903  
-bin_to_list(_, _, 0) -> [];
904  
-bin_to_list(Bin, St, L) -> binary_to_list(Bin, St, St+L-1).
905  
-
906  
-sub_bin(Bin, St) ->
907  
-    St1 = St - 1,
908  
-    <<_:St1/binary,Sub/binary>> = Bin,
909  
-    Sub.
910  
-
911  
-sub_bin(Bin, St, Len) ->
912  
-    St1 = St - 1,
913  
-    <<_:St1/binary,Sub:Len/binary,_/binary>> = Bin,
914  
-    Sub.
915  
-
916  
-%% The NFA engines.
917  
-%%  We have two separate engines depending on whether we want to
918  
-%%  capture sub-expressions. Both have a top-level driver for strings
919  
-%%  and binaries. We need to do one character lookahead to get correct
920  
-%%  end of string behaviour as we match both [] and [$\n]. This is a
921  
-%%  pain!
922  
-
923  
-%% next_match_str(String, StartPos, NFA) ->
924  
-%%      {match,Start,Length,Chars,RestChars} | nomatch.
925  
-%%  Find the next match in String. Try successive positions until
926  
-%%  either a match is found or we reach the end of the string.
927  
-
928  
-next_match_str(Cs, P, {Nfa,Start,_}) ->
929  
-    next_match_str(Cs, P, Nfa, eclosure(Start, Nfa, [], [])).
930  
-
931  
-next_match_str([_|Cs1]=Cs0, P0, Nfa, Ss) ->
932  
-    case nfa_str(Cs0, P0, Nfa, Ss, nomatch) of
933  
-	{match,P1,Cs} -> {match,P0,P1-P0,Cs0,Cs};
934  
-	nomatch -> next_match_str(Cs1, P0+1, Nfa, Ss)
935  
-    end;
936  
-next_match_str([], P0, Nfa, Ss) ->
937  
-    case nfa_str([], P0, Nfa, Ss, nomatch) of	%Try for null match at end.
938  
-	{match,P1,Cs} -> {match,P0,P1-P0,[],Cs};
939  
-	nomatch -> nomatch
940  
-    end.
941  
-
942  
-%% nfa_str(Chars, Pos, NFA, States, Accept) -> {match,NextPos,Rest} | nomatch.
943  
-%%  Run the NFA machine over binary starting at one position until we
944  
-%%  either have a match or not a match.
945  
-
946  
-nfa_str(_, _, _, [], A) -> A;			%No matching states
947  
-nfa_str([C|[C1|_]=Cs1]=Cs0, P, Nfa, Ss0, A) ->
948  
-    Gl = {P,C,C1},
949  
-    case step(C, Gl, Nfa, Ss0, [], false) of
950  
-	{Ss1,true} ->
951  
-	    nfa_str(Cs1, P+1, Nfa, Ss1, {match,P,Cs0});
952  
-	{Ss1,false} ->
953  
-	    nfa_str(Cs1, P+1, Nfa, Ss1, A)
954  
-    end;
955  
-nfa_str([C]=Cs0, P, Nfa, Ss0, A) ->
956  
-    Gl = {P,C,eos},
957  
-    case step(C, Gl, Nfa, Ss0, [], false) of
958  
-	{Ss1,true} ->
959  
-	    nfa_str([], P+1, Nfa, Ss1, {match,P,Cs0});
960  
-	{Ss1,false} ->
961  
-	    nfa_str([], P+1, Nfa, Ss1, A)
962  
-    end;
963  
-nfa_str([], P, Nfa, Ss, A) ->			%No more characters
964  
-    case has_match(P, Nfa, Ss) of
965  
-	yes -> {match,P,[]};
966  
-	no -> A					%Take what we got
967  
-    end.
968  
-
969  
-%% next_match_bin(Binary, StartPos, NFA) ->
970  
-%%      {match,Start,Length} | nomatch.
971  
-%%  Find the next match in Binary. Try successive positions until
972  
-%%  either a match is found or we reach the end of the string.
973  
-
974  
-next_match_bin(Bin, P, {Nfa,Start,_}) ->
975  
-    next_match_bin(Bin, P, Nfa, eclosure(Start, Nfa, [], [])).
976  
-
977  
-next_match_bin(Bin, P0, Nfa, Ss) when P0 < size(Bin) ->
978  
-    case nfa_bin(Bin, P0, Nfa, Ss, nomatch) of
979  
-	{match,P1} -> {match,P0,P1-P0};
980  
-	nomatch -> next_match_bin(Bin, P0+1, Nfa, Ss)
981  
-    end;
982  
-next_match_bin(Bin, P0, Nfa, Ss) ->
983  
-    case nfa_bin(Bin, P0, Nfa, Ss, nomatch) of	%Try for null match at end.
984  
-	{match,P1} -> {match,P0,P1-P0};
985  
-	nomatch -> nomatch
986  
-    end.
987  
-
988  
-%% nfa_bin(Binary, Pos, NFA, States, Accept) -> {match,NextPos} | nomatch.
989  
-%%  Run the NFA machine over binary starting at one position until we
990  
-%%  either have a match or not a match.
991  
-
992  
-nfa_bin(_, _, _, [], A) -> A;			%No matching states
993  
-nfa_bin(Bin, P, Nfa, Ss0, A) ->
994  
-    P1 = P-1,					%Number of chars before
995  
-    case Bin of
996  
-	<<_:P1/binary,C,C1,_/binary>> ->
997  
-	    Gl = {P,C,C1},
998  
-	    case step(C, Gl, Nfa, Ss0, [], false) of
999  
-		{Ss1,true} ->
1000  
-		    nfa_bin(Bin, P+1, Nfa, Ss1, {match,P});
1001  
-		{Ss1,false} ->
1002  
-		    nfa_bin(Bin, P+1, Nfa, Ss1, A)
1003  
-	    end;
1004  
-	<<_:P1/binary,C,_/binary>> ->
1005  
-	    Gl = {P,C,eos},
1006  
-	    case step(C, Gl, Nfa, Ss0, [], false) of
1007  
-		{Ss1,true} ->
1008  
-		    nfa_bin(Bin, P+1, Nfa, Ss1, {match,P});
1009  
-		{Ss1,false} ->
1010  
-		    nfa_bin(Bin, P+1, Nfa, Ss1, A)
1011  
-	    end;
1012  
-	_ ->					%No more characters.
1013  
-	    case has_match(P, Nfa, Ss0) of
1014  
-		yes -> {match,P};
1015  
-		no -> A				%Take what we got
1016  
-	    end
1017  
-    end.
1018  
-
1019  
-%% step(Char, GlobalState, NFA, States, NewStates, Done) -> {NewStates,Done}.
1020  
-%%  Pos is the position of the current character.
1021  
-
1022  
-step(C, Gl, Nfa, [S|Ss], News, D) ->
1023  
-    case element(S, Nfa) of
1024  
-	#cstate{c=C,s=N} ->
1025  
-	    step(C, Gl, Nfa, Ss, eclosure(N, Nfa, [], News), D);
1026  
-	#cstate{c=done} -> step(C, Gl, Nfa, Ss, News, true);
1027  
-	#cstate{} -> step(C, Gl, Nfa, Ss, News, D);
1028  
-	#nstate{cc=Cc,s=N} ->
1029  
-	    case match_char(C, Cc) of
1030  
-		true ->
1031  
-		    step(C, Gl, Nfa, Ss, eclosure(N, Nfa, [], News), D);
1032  
-		false -> step(C, Gl, Nfa, Ss, News, D)
1033  
-	    end;
1034  
-	#pstate{t=bos,s=N} ->
1035  
-	    if element(1, Gl) == 1 ->
1036  
-		    %% Add eclosure to *this* level of states
1037  
-		    Ss1 = eclosure(N, Nfa, [], Ss),
1038  
-		    step(C, Gl, Nfa, Ss1, News, D);
1039  
-	       true -> step(C, Gl, Nfa, Ss, News, D)
1040  
-	    end;
1041  
-	#pstate{t=eos,s=N} ->
1042  
-	    Ss1 = if element(2, Gl) == $\n, element(3, Gl) == eos ->
1043  
-			  %% Add eclosure to *this* level of states
1044  
-			  eclosure(N, Nfa, [], Ss);
1045  
-		     true -> Ss
1046  
-		  end,
1047  
-	    step(C, Gl, Nfa, Ss1, News, D)
1048  
-    end;
1049  
-step(_, _, _, [], News, D) -> {News,D}.
1050  
-
1051  
-%% eclosure(State, Nfa, SeenStates, NewStates) -> NewStates.
1052  
-
1053  
-eclosure(S, Nfa, Es, Rest) ->
1054  
-    case element(S, Nfa) of
1055  
-	#estate{s1=S1,s2=S2} ->
1056  
-	    %% Must track of where we have been to avoid loops.
1057  
-	    case member(S, Es) of
1058  
-		true -> Rest;
1059  
-		false ->
1060  
-		    Es1 = [S|Es],
1061  
-		    eclosure(S1, Nfa, Es1, eclosure(S2, Nfa, Es1, Rest))
1062  
-	    end;
1063  
-	%% Just ignore parentheses states here.
1064  
-	#lstate{s=S1} -> eclosure(S1, Nfa, Es, Rest);
1065  
-	#rstate{s=S1} -> eclosure(S1, Nfa, Es, Rest);
1066  
-	%% All other states get added to state list.
1067  
-	_St -> add_state(S, Rest, Rest)
1068  
-    end.
1069  
-
1070  
-%% add_state(State, States, States) -> States.
1071  
-%% Add a state to list of states. As list generally short it is better
1072  
-%% to carry it around in extra argument and prepend new to beginning
1073  
-%% rather than rebuilding every call.
1074  
-
1075  
-add_state(S, [S|_Ss], All) -> All;
1076  
-add_state(S, [_|Ss], All) -> add_state(S, Ss, All);
1077  
-add_state(S, [], All) -> [S|All].
1078  
-
1079  
-%% match_char(Char, Class) -> bool().
1080  
-
1081  
-match_char(C, [{C1,C2}|_Cc]) when C >= C1, C =< C2 -> true;
1082  
-match_char(C, [C|_Cc]) -> true;
1083  
-match_char(C, [_|Cc]) -> match_char(C, Cc);
1084  
-match_char(_, []) -> false.
1085  
-
1086  
-has_match(P, Nfa, [S|Ss]) ->
1087