Skip to content

Commit

Permalink
[add] manager api
Browse files Browse the repository at this point in the history
  • Loading branch information
filipevarjao committed Jul 10, 2020
1 parent 6a94c23 commit 0131448
Show file tree
Hide file tree
Showing 3 changed files with 112 additions and 17 deletions.
17 changes: 15 additions & 2 deletions lib/crawly/engine.ex
Original file line number Diff line number Diff line change
Expand Up @@ -22,11 +22,11 @@ defmodule Crawly.Engine do
end

@spec get_manager(module()) ::
pid() | {:error, :spider_non_exist} | {:error, :spider_not_found}
pid() | {:error, :spider_not_found}
def get_manager(spider_name) do
case Map.fetch(running_spiders(), spider_name) do
:error ->
{:error, :spider_non_exist}
{:error, :spider_not_found}

{:ok, pid_sup} ->
Supervisor.which_children(pid_sup)
Expand Down Expand Up @@ -64,6 +64,19 @@ defmodule Crawly.Engine do
{:ok, %Crawly.Engine{}}
end

def handle_call({:get_manager, spider_name}, _, state) do
pid =
case Map.get(state.started_spiders, spider_name) do
nil ->
{:error, :spider_not_found}

pid ->
pid
end

{:reply, pid, state}
end

def handle_call(:running_spiders, _from, state) do
{:reply, state.started_spiders, state}
end
Expand Down
14 changes: 13 additions & 1 deletion lib/crawly/manager.ex
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,19 @@ defmodule Crawly.Manager do

use GenServer

alias Crawly.Utils
alias Crawly.{Engine, Utils}

@spec add_workers(module(), non_neg_integer()) ::
:ok | {:error, :spider_non_exist}
def add_workers(spider_name, num_of_workers) do
case Engine.get_manager(spider_name) do
{:error, reason} ->
{:error, reason}

pid ->
GenServer.cast(pid, {:add_workers, num_of_workers})
end
end

def start_link(spider_name) do
Logger.debug("Starting the manager for #{spider_name}")
Expand Down
98 changes: 84 additions & 14 deletions test/manager_test.exs
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
defmodule ManagerTest do
use ExUnit.Case, async: false

alias Features.Manager.TestSpider

setup do
Application.put_env(:crawly, :concurrent_requests_per_domain, 1)
Application.put_env(:crawly, :closespider_itemcount, 10)
Expand All @@ -20,25 +18,49 @@ defmodule ManagerTest do

on_exit(fn ->
:meck.unload()
Crawly.Engine.stop_spider(TestSpider)
Crawly.Engine.stop_spider(Manager.TestSpider)
Application.put_env(:crawly, :manager_operations_timeout, 30_000)
Application.put_env(:crawly, :concurrent_requests_per_domain, 1)
Application.put_env(:crawly, :closespider_timeout, 20)
Application.put_env(:crawly, :closespider_itemcount, 100)
end)
end

test "it is possible to add more workers to a spider" do
spider_name = Manager.TestSpider
:ok = Crawly.Engine.start_spider(spider_name)
initial_number_of_workers = 1

assert initial_number_of_workers ==
DynamicSupervisor.count_children(spider_name)[:workers]

workers = 2
assert :ok == Crawly.Manager.add_workers(spider_name, workers)

pid = Crawly.Engine.get_manager(spider_name)
state = :sys.get_state(pid)
assert spider_name == state.name

assert initial_number_of_workers + workers ==
DynamicSupervisor.count_children(spider_name)[:workers]
end

test "returns error when spider doesn't exist" do
assert {:error, :spider_non_exist} ==
Crawly.Manager.add_workers(Manager.NonExistentSpider, 2)
end

test "max request per minute is respected" do
:ok = Crawly.Engine.start_spider(TestSpider)
:ok = Crawly.Engine.start_spider(Manager.TestSpider)

{:stored_requests, num} = Crawly.RequestsStorage.stats(TestSpider)
{:stored_requests, num} = Crawly.RequestsStorage.stats(Manager.TestSpider)
assert num == 1
Process.sleep(1_00)

{:stored_items, num} = Crawly.DataStorage.stats(TestSpider)
{:stored_items, num} = Crawly.DataStorage.stats(Manager.TestSpider)
assert num == 1

:ok = Crawly.Engine.stop_spider(TestSpider)
:ok = Crawly.Engine.stop_spider(Manager.TestSpider)
assert %{} == Crawly.Engine.running_spiders()
end

Expand All @@ -47,15 +69,14 @@ defmodule ManagerTest do

Application.put_env(:crawly, :manager_operations_timeout, 50)
Application.put_env(:crawly, :closespider_itemcount, 1)
:ok = Crawly.Engine.start_spider(TestSpider)
:ok = Crawly.Engine.start_spider(Manager.TestSpider)

assert_receive :itemcount_timeout

assert %{} == Crawly.Engine.running_spiders()
end

test "Closespider timeout is respected" do

Process.register(self(), :spider_closed_callback_test)

# Ignore closespider_itemcount
Expand All @@ -64,24 +85,73 @@ defmodule ManagerTest do
Application.put_env(:crawly, :closespider_timeout, 10)

Application.put_env(:crawly, :manager_operations_timeout, 50)
:ok = Crawly.Engine.start_spider(TestSpider)
:ok = Crawly.Engine.start_spider(Manager.TestSpider)

assert_receive :itemcount_timeout
assert %{} == Crawly.Engine.running_spiders()
end

test "Can't start already started spider" do
:ok = Crawly.Engine.start_spider(TestSpider)
:ok = Crawly.Engine.start_spider(Manager.TestSpider)

assert {:error, :spider_already_started} ==
Crawly.Engine.start_spider(TestSpider)
Crawly.Engine.start_spider(Manager.TestSpider)
end

test "Spider closed callback is called when spider is stopped" do
Process.register(self(), :spider_closed_callback_test)
:ok = Crawly.Engine.start_spider(TestSpider)
:ok = Crawly.Engine.stop_spider(TestSpider, :manual_stop)
:ok = Crawly.Engine.start_spider(Manager.TestSpider)
:ok = Crawly.Engine.stop_spider(Manager.TestSpider, :manual_stop)

assert_receive :manual_stop
end
end

defmodule Manager.TestSpider do
use Crawly.Spider

def override_settings() do
on_spider_closed_callback = fn reason ->
case Process.whereis(:spider_closed_callback_test) do
nil ->
:nothing_to_do

_pid ->
send(:spider_closed_callback_test, reason)
end
end

[on_spider_closed_callback: on_spider_closed_callback]
end

def base_url() do
"https://www.example.com"
end

def init() do
[
start_urls: ["https://www.example.com/blog.html"]
]
end

def parse_item(_response) do
path = Enum.random(1..100)

%{
:items => [
%{title: "t_#{path}", url: "example.com", author: "Me", time: "not set"}
],
:requests => [
Crawly.Utils.request_from_url("https://www.example.com/#{path}")
]
}
end

def spider_closed(:manual_stop) do
send(:spider_closed_callback_test, :manual_stop)
end

def spider_closed(_) do
:ignored
end
end

0 comments on commit 0131448

Please sign in to comment.