Jacob Vorreuter committed May 1, 2009
1 parent c7834cc commit eb12b70
Showing 13 changed files with 1,547 additions and 24 deletions.
1 change: 1 addition & 0 deletions Makefile
Expand Up @@ -2,6 +2,7 @@ all:
mkdir -p ebin/
#(cd templates;$(MAKE))
(cd src;$(MAKE))
(cd t;$(MAKE))

test: all
prove -v t/*.t
Empty file removed public/gracenote.html
Empty file.
405 changes: 405 additions & 0 deletions public/gracenote_album_2021ab38530960de.html

Large diffs are not rendered by default.

453 changes: 453 additions & 0 deletions public/gracenote_album_b917d0542e3ab9a7.html

Large diffs are not rendered by default.

480 changes: 480 additions & 0 deletions public/gracenote_albums.html

Large diffs are not rendered by default.

87 changes: 67 additions & 20 deletions src/ex_consumer.erl
@@ -1,10 +1,11 @@
-export([execute/2, fetch/3, assign/3, assert/3, commit/3, each/4, configure/3]).
-export([execute/2, fetch/3, assign/3, assert/3, commit/3, commit/4, each/4, configure/3, function/2, print/2, onfail/3]).


%% @spec execute(instr(), State) -> State1
execute({instr, Function, Args}, State) ->
?INFO_MSG("~p(~p)~n", [Function, [state|Args]]),
apply(?MODULE, Function, [State|Args]).

%% template functions
Expand All @@ -13,51 +14,97 @@ fetch(State, Key, {Method, Url}) when Method==options;Method==get;Method==head;M
fetch(State, Key, {Method, Url, Headers}) when Method==options;Method==get;Method==head;Method==delete;Method==trace ->
fetch(State, Key, {Method, Url, Headers, []});
fetch(State, Key, {Method, Url, Headers, Body}) ->
Response = ex_web:request(Method, Url, Headers, Body),
Url1 = lists:flatten([begin
case I of
String when is_list(String) -> String;
Atom when is_atom(Atom) -> to_string(?FETCH(State, Atom));
Other -> Other
end || I <- Url]),
io:format("url: ~p~n", [Url1]),
Response = ex_web:request(Method, Url1, Headers, Body),
?STORE(State, Key, Response).

assign(State, Key, Term) ->
?STORE(State, Key, evaluate(State, Term)).
?STORE(State, Key, compute(State, Term)).

assert(State, Key, Assertion) ->
assert_true(?FETCH(State, Key), Assertion),

commit(State, _Key, _Value) ->
commit(State, Key, Value) ->
%Key1 = evaluate(State, Key),
Value1 = evaluate(State, Value),
io:format("commit ~p:~p~n", [Key, Value1]),
%% commit Key/Value to CouchDB or some disk-based key/value store

commit(State, Key, Value, {CallbackModule, CallbackFunction}) ->
%Key1 = evaluate(State, Key),
Value1 = evaluate(State, Value),
io:format("commit ~p:~p~n", [Key, Value1]),
spawn(CallbackModule, CallbackFunction, [Key, Value1]),

each(#state{stack=Stack, instructions=OldInstructions}=State, Key, Source, NewInstructions) ->
case ?FETCH(Source) of
Val when Val==undefined; Val==[] ->
each(#state{stack=Stack}=State, Key, Source, NewInstructions) ->
case ?FETCH(State, Source) of
{_Type, Val} when Val==undefined; Val==[] ->
exit({?MODULE, ?LINE, fetch_failed, Source, Val});
[Val] -> %% last item in each list
NewState = ?STORE(State, Key, Val),
{Type, [Val]} -> %% last item in list
NewState = ?STORE(State, Key, typify_value(Type, Val)),
[Val|Tail] ->
OldState0 = ?STORE(State, Source, Tail), %% insert list tail for source key
%% add this instruction to head of list so that when stack is popped
%% and this state begins processing again, the each list is the next instruction still
OldState1 = OldState0#state{instructions=[{instr, each, [Key, Source, NewInstructions]}|OldInstructions]},
NewState = ?STORE(State, Key, Val),
NewState#state{instructions=NewInstructions, stack=[OldState1|Stack]} %% push old state on to stack of new state
{Type, [Val|Tail]} ->
OldState = ?STORE(State, Source, {Type, Tail}), %% insert list tail for source key
NewState = ?STORE(State, Key, typify_value(Type, Val)), %% insert item from source
NewState#state{instructions=NewInstructions, stack=[OldState|Stack]} %% push old state on to stack of new state
%% next instruction processed will be first from Instructions

configure(State, Key, Value) ->
?CONFIGURE(State, Key, Value).

function(State, Fun) when is_function(Fun) ->

print(State, Key) ->
error_logger:info_report({print, ?FETCH(State, Key)}),

onfail(#state{stack=Stack}=State, AttemptInstrs, _FailInstr) when is_list(AttemptInstrs) ->
State#state{instructions=AttemptInstrs, stack=[State|Stack]}.

%% internal functions
evaluate(State, {xpath, Source, XPath}) ->
compute(State, {xpath, Source, XPath}) ->
ex_xpath:run(XPath, ?FETCH(State, Source));
evaluate(State, {regexp, Source, Regexp}) ->
compute(State, {regexp, Source, Regexp}) ->
ex_re:run(Regexp, ?FETCH(State, Source)).

evaluate(State, Tuple) when is_tuple(Tuple) ->
list_to_tuple([evaluate(State, I) || I <- tuple_to_list(Tuple)]);
evaluate(State, Key) when is_atom(Key) ->
case ?FETCH(State, Key) of undefined -> Key; {_, Other} -> Other end;
evaluate(_State, Other) -> Other.

assert_true({nil, Key}, nil) when Key==[]; Key==undefined -> ok;
assert_true({string, Key}, string) when is_list(Key) -> ok;
assert_true({string, Key}, string) when is_list(Key), length(Key) > 0 -> ok;
assert_true({node, Key}, node) when is_tuple(Key) -> ok;
assert_true(Key, node) when is_tuple(Key) -> ok;
assert_true({list_of_strings, Key}, list_of_strings) when is_list(Key) -> [assert_true(Item, string) || Item <- Key], ok;
assert_true({list_of_nodes, Key}, list_of_nodes) when is_list(Key) -> [assert_true(Item, node) || Item <- Key], ok;
assert_true({http_response, _S, _H, Body}, string) when is_list(Body), length(Body) > 0 -> ok;
assert_true({http_response, Status, _H, _B}, {status, Status}) -> ok;
assert_true(Key, Assertion) -> exit({?MODULE, assertion_failed, {Key, Assertion}}).

typify_value(list_of_strings, {string, String}) ->
{string, String};
typify_value(list_of_strings, String) when is_list(String) ->
{string, String};
typify_value(list_of_nodes, {node, Node}) ->
{node, Node};
typify_value(list_of_nodes, Node) ->
{node, Node}.

to_string(List) when is_list(List) -> List;
to_string({string, String}) -> String;
to_string({node, Node}) -> to_string(ex_xpath:reassemble({node, Node})).
44 changes: 44 additions & 0 deletions src/ex_engine.erl
@@ -0,0 +1,44 @@


%% when a list of instructions is passed in, create a
%% new state and begin processing
run(Instructions) when is_list(Instructions) ->

%% finished running
run(#state{instructions=[], stack=[]}) ->

%% finished local instruction set, pop stack
run(#state{instructions=[], stack=[#state{instructions=[{instr, onfail, _}|Instrs]}=S|_]}) ->

run(#state{instructions=[], stack=[OldState|_]}) ->

run(#state{instructions=[Instr|Instrs], stack=Stack}=State) ->
NewState0 =
case keep_instruction(Instr) of
true -> State#state{instructions=[Instr|Instrs]};
false -> State#state{instructions=Instrs}
case catch ex_consumer:execute(Instr, NewState0) of
{'EXIT', Error} ->
case Stack of
[#state{instructions=[{instr, onfail, [_, FailInstrs]}|InstrsTail]}=OldState|StackTail] ->
NewStack = [OldState#state{instructions=InstrsTail}|StackTail],
NewState1 = #state{instructions=FailInstrs, stack=NewStack},
_ ->
exit(Error) %% perhaps someone else will enjoy this
NewState1 ->

%% internal functions
keep_instruction({instr, Instr, _}) when Instr==each; Instr==onfail -> true;
keep_instruction(_) -> false.
3 changes: 3 additions & 0 deletions src/ex_re.erl
Expand Up @@ -5,6 +5,9 @@
%% Regexp = {re_pattern, _, _, _}
%% Subject = {Type, Value}
%% Result = {nil, _} | {string, _} | {list_of_strings, _}
run(Regexp, {node, Subject}) when is_tuple(Regexp), is_tuple(Subject) ->
run(Regexp, ex_xpath:reassemble({node, Subject}));

run(Regexp, {string, Subject}) when is_tuple(Regexp), is_list(Subject) ->
case re:run(Subject, Regexp, [global]) of
nomatch ->
4 changes: 2 additions & 2 deletions src/ex_web.erl
Expand Up @@ -17,13 +17,13 @@ request(Method, Url, [], Body) -> request(Method, Url, ?HEADERS, Body);
request(Method, Url, Headers, [])
when Method==options;Method==get;Method==head;Method==delete;Method==trace ->
case http:request(Method, {Url, Headers}, [], []) of
{ok, Response} -> {string, Response};
{ok, {{_,RspStatus,_}, RspHeaders, RspBody}} -> {http_response, RspStatus, RspHeaders, RspBody};
{error, Reason} -> exit({?MODULE, ?LINE, Reason})

request(Method, Url, Headers, Body)
when Method == post; Method == put ->
case http:request(Method, {Url, Headers, "text/html", Body}, [], []) of
{ok, Response} -> {string, Response};
{ok, {{_,RspStatus,_}, RspHeaders, RspBody}} -> {http_response, RspStatus, RspHeaders, RspBody};
{error, Reason} -> exit({?MODULE, ?LINE, Reason})
10 changes: 8 additions & 2 deletions src/ex_xpath.erl
@@ -1,11 +1,14 @@
-export([run/2, reassemble/1]).

%% @spec run(XPath, Subject) -> Result
%% XPath = string()
%% Subject = {Type, Value}
%% Result = {nil, []} | {node, _} | {list_of_nodes, _} | {string, _} | {list_of_strings, _}
run(XPath, {http_response, _, _, Body}) ->
run(XPath, {string, Body});

run(XPath, {string, Subject0}) when is_list(XPath), is_list(Subject0) ->
case mochiweb_html:parse(Subject0) of
Subject when is_tuple(Subject) ->
Expand All @@ -32,4 +35,7 @@ run(XPath, {node, Subject}) when is_list(XPath), is_tuple(Subject) ->
{list_of_strings, [binary_to_list(Bin) || Bin <- List]};
_ ->
exit({?MODULE, ?LINE, XPath, Subject})

reassemble({node, Node}) ->
{string, binary_to_list(iolist_to_binary(mochiweb_html:to_html(Node)))}.
9 changes: 9 additions & 0 deletions t/Makefile
@@ -0,0 +1,9 @@
include ../support/

all: $(EBIN_FILES)


rm -rf $(EBIN_FILES) erl_crash.dump
51 changes: 51 additions & 0 deletions t/excavator_t_004.t
@@ -0,0 +1,51 @@
%% -*- erlang -*-
%%! -pa ebin -sasl errlog_type error -boot start_sasl -noshell

main(_) ->
case (catch start()) of
{'EXIT', Err} ->
io:format("# ~p~n", [Err]),
_ ->

start() ->
Instrs =
[ {instr, configure, [qps, 10]},
{instr, fetch, [artist_page, {get, ""}]},
{instr, assert, [artist_page, {status, 200}]},
{instr, assert, [artist_page, string]},
{instr, assign, [albums, {xpath, artist_page, "//div[@class='album-meta-data-wrapper']"}]},
{instr, assert, [albums, list_of_nodes]},
{instr, each, [album, albums, [
{instr, assign, [album_href, {xpath, album, "//a[1]/@href"}]},
{instr, assign, [album_id, {regexp, album_href, compile_re("tui_id=(.*)tui")}]},
{instr, assert, [album_id, string]},
{instr, fetch, [album_page, {get, ["", album_id, ".html"]}]},
{instr, onfail, [
[ {instr, assert, [album_page, {status, 200}]},
{instr, assert, [album_page, string]},
{instr, assign, [album_name_node, {xpath, album_page, "//div[@class='album-name']"}]},
{instr, assert, [album_name_node, node]},
{instr, assign, [album_name, {regexp, album_name_node, compile_re(" &gt; (.*)</div>")}]},
{instr, assert, [album_name, string]},
{instr, commit, [{album, beatles}, {album_id, album_name}]},
{instr, print, [album_name]}
[{instr, function, [fun(S) -> io:format("This shit fAiLeD~n") end]}]



compile_re(Regexp) ->
{ok, RE} = re:compile(Regexp), RE.
24 changes: 24 additions & 0 deletions t/test_server.erl
@@ -0,0 +1,24 @@


%% web_server callbacks

start_link() ->
web_server:start(?MODULE, [{address, ""}, {port, 8888}]).

%% web_server callbacks

%% Function: dispatch(Req, PathTokens) -> {reply, Status, Headers, Body} |
%% {reply, Module, Function, Args} |
%% undefined
%% Req = mochiweb_request()
%% PathTokens = list()
dispatch(_, _) ->

