Skip to content

Commit

Permalink
Make it possible to preview scheduled requests in the UI
Browse files Browse the repository at this point in the history
  • Loading branch information
oltarasenko committed Mar 15, 2023
1 parent 3b05258 commit a5d34e7
Show file tree
Hide file tree
Showing 7 changed files with 144 additions and 36 deletions.
35 changes: 33 additions & 2 deletions lib/crawly/api.ex
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,7 @@ defmodule Crawly.API.Router do
end
)

filename = Path.join(:code.priv_dir(:crawly), "index.html.eex")
response = EEx.eval_file(filename, data: spiders_list)
response = render_template("list.html.eex", data: spiders_list)
send_resp(conn, 200, response)
end

Expand All @@ -68,6 +67,29 @@ defmodule Crawly.API.Router do
send_resp(conn, 200, msg)
end

get "/spiders/:spider_name/requests" do
spider_name = String.to_atom("Elixir.#{spider_name}")

result =
case Crawly.RequestsStorage.requests(spider_name) do
{:requests, result} ->
Enum.map(result, fn req ->
%{url: req.url, headers: inspect(req.headers)}
end)

{:error, _} ->
[]
end

response =
render_template("requests_list.html.eex",
requests: result,
spider_name: spider_name
)

send_resp(conn, 200, response)
end

get "/spiders/:spider_name/schedule" do
spider_name = String.to_atom("Elixir.#{spider_name}")
result = Crawly.Engine.start_spider(spider_name)
Expand Down Expand Up @@ -125,4 +147,13 @@ defmodule Crawly.API.Router do
match _ do
send_resp(conn, 404, "Oops! Page not found!")
end

defp render_template(template_name, assigns) do
base_dir = :code.priv_dir(:crawly)
template = Path.join(base_dir, template_name)
rendered_template = EEx.eval_file(template, assigns)

base_template = Path.join(base_dir, "index.html.eex")
EEx.eval_file(base_template, rendered_template: rendered_template)
end
end
19 changes: 19 additions & 0 deletions lib/crawly/requests_storage/requests_storage.ex
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,12 @@ defmodule Crawly.RequestsStorage do
GenServer.call(__MODULE__, {:stats, spider_name})
end

@spec requests(atom()) ::
{:requests, [Crawly.Request.t()]} | {:error, :spider_not_running}
def requests(spider_name) do
GenServer.call(__MODULE__, {:requests, spider_name})
end

@doc """
Starts a worker for a given spider
"""
Expand Down Expand Up @@ -130,6 +136,19 @@ defmodule Crawly.RequestsStorage do
{:reply, msg, state}
end

def handle_call({:requests, spider_name}, _from, state) do
msg =
case Map.get(state.workers, spider_name) do
nil ->
{:error, :storage_worker_not_running}

pid ->
Crawly.RequestsStorage.Worker.requests(pid)
end

{:reply, msg, state}
end

def handle_call({:start_worker, spider_name, crawl_id}, _from, state) do
{msg, new_state} =
case Map.get(state.workers, spider_name) do
Expand Down
10 changes: 10 additions & 0 deletions lib/crawly/requests_storage/requests_storage_worker.ex
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,12 @@ defmodule Crawly.RequestsStorage.Worker do
do_call(pid, :stats)
end

@doc """
Returns all scheduled requests (used for some sort of preview)
"""
@spec requests(pid()) :: {:requests, [Crawly.Request.t()]}
def requests(pid), do: do_call(pid, :requests)

def start_link(spider_name, crawl_id) do
GenServer.start_link(__MODULE__, [spider_name, crawl_id])
end
Expand Down Expand Up @@ -81,6 +87,10 @@ defmodule Crawly.RequestsStorage.Worker do
{:reply, {:stored_requests, state.count}, state}
end

def handle_call(:requests, _from, state) do
{:reply, {:requests, state.requests}, state}
end

defp do_call(pid, command) do
GenServer.call(pid, command)
catch
Expand Down
36 changes: 2 additions & 34 deletions priv/index.html.eex
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
<!DOCTYPE html>
<html>
<head>

<meta http-equiv="refresh" content="10">
<script type="text/javascript">
function get(name, link) {
var xhttp = new XMLHttpRequest();
Expand Down Expand Up @@ -108,38 +108,6 @@
<div class="header">
<h1>Crawly Management Tool</h1>
</div>
<div class="row">
<div id="status"></div>
<div class="leftcolumn">
<div class="card">
<h2>Spiders</h2>
<table>
<tr>
<th>Spider name</td>
<th>State</td>
<th>Items scraped</td>
<th>Scheduled Requests</td>
<th>Command</td>
</tr>
<%= for spider <- data do %>
<tr>
<td><%= spider.name %></td>
<td><%= spider.state %></td>
<td> <%= spider.scraped %> </td>
<td> <%= spider.scheduled %></td>
<%= if spider.state == :idle do %>
<td> <input type = "button" onclick = "schedule('<%= spider.name %>')" value = "Schedule"> </td>
<% else %>
<td> <input type = "button" onclick = "stop('<%= spider.name %>')" value = "Stop"> </td>
<% end %>

</tr>
<% end %>
</table>
</div>
</div>
<div class="rightcolumn">
</div>
</div>
<%= rendered_template %>
</body>
</html>
33 changes: 33 additions & 0 deletions priv/list.html.eex
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
<div class="row">
<div id="status"></div>
<div class="leftcolumn">
<div class="card">
<h2>Spiders</h2>
<table>
<tr>
<th>Spider name</td>
<th>State</td>
<th>Items scraped</td>
<th>Scheduled Requests</td>
<th>Command</td>
</tr>
<%= for spider <- data do %>
<tr>
<td><%= spider.name %></td>
<td><%= spider.state %></td>
<td> <%= spider.scraped %> </td>
<td><a href="/spiders/<%= spider.name %>/requests" ><%= spider.scheduled %></td>
<%= if spider.state == :idle do %>
<td> <input type = "button" onclick = "schedule('<%= spider.name %>')" value = "Schedule"> </td>
<% else %>
<td> <input type = "button" onclick = "stop('<%= spider.name %>')" value = "Stop"> </td>
<% end %>

</tr>
<% end %>
</table>
</div>
</div>
<div class="rightcolumn">
</div>
</div>
25 changes: 25 additions & 0 deletions priv/requests_list.html.eex
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
<div class="row">
<div id="status"></div>
<div class="leftcolumn">
<div class="card">
<h3>
List of scheduled requests for: <%= spider_name %>
<a href="/">Back</a>
</h3>
<table>
<tr>
<th>url</th>
<th>headers</th>
</tr>
<%= for req <- requests do %>
<tr>
<td><%= req.url %></td>
<td><%= req.headers %></td>
</tr>
<% end %>

</table>
</div>
<div class="rightcolumn">
</div>
</div>
22 changes: 22 additions & 0 deletions test/request_storage_test.exs
Original file line number Diff line number Diff line change
Expand Up @@ -109,4 +109,26 @@ defmodule RequestStorageTest do
{:stored_requests, num} = Crawly.RequestsStorage.stats(context.crawler)
assert 0 == num
end

test "Can get requests list from the requests storage", context do
request = %Crawly.Request{
url: "http://example.com",
headers: [],
options: []
}

:ok = Crawly.RequestsStorage.store(context.crawler, request)

{:requests, [stored_request]} =
Crawly.RequestsStorage.requests(context.crawler)

assert request == stored_request
end

test "Getting requests list from the requests storage if nothing is there",
context do
{:requests, req_lists} = Crawly.RequestsStorage.requests(context.crawler)

assert req_lists == []
end
end

0 comments on commit a5d34e7

Please sign in to comment.