Skip to content

Commit

Permalink
[add] API to Crawly.Manager (#118)
Browse files Browse the repository at this point in the history
* [add] an API to Crawly.Manager

Co-authored-by: oltarasenko <oltarasenko@gmail.com>
  • Loading branch information
filipevarjao and oltarasenko committed Aug 31, 2020
1 parent 0bf308c commit 30656b6
Show file tree
Hide file tree
Showing 3 changed files with 84 additions and 6 deletions.
30 changes: 30 additions & 0 deletions lib/crawly/engine.ex
Expand Up @@ -21,6 +21,23 @@ defmodule Crawly.Engine do
GenServer.call(__MODULE__, {:start_spider, spider_name})
end

@spec get_manager(module()) ::
pid() | {:error, :spider_not_found}
def get_manager(spider_name) do
case Map.fetch(running_spiders(), spider_name) do
:error ->
{:error, :spider_not_found}

{:ok, pid_sup} ->
Supervisor.which_children(pid_sup)
|> Enum.find(&({Crawly.Manager, _, :worker, [Crawly.Manager]} = &1))
|> case do
nil -> {:error, :spider_not_found}
{_, pid, :worker, _} -> pid
end
end
end

@spec stop_spider(module(), reason) :: result
when reason: :itemcount_limit | :itemcount_timeout | atom(),
result: :ok | {:error, :spider_not_running}
Expand All @@ -47,6 +64,19 @@ defmodule Crawly.Engine do
{:ok, %Crawly.Engine{}}
end

def handle_call({:get_manager, spider_name}, _, state) do
pid =
case Map.get(state.started_spiders, spider_name) do
nil ->
{:error, :spider_not_found}

pid ->
pid
end

{:reply, pid, state}
end

def handle_call(:running_spiders, _from, state) do
{:reply, state.started_spiders, state}
end
Expand Down
35 changes: 30 additions & 5 deletions lib/crawly/manager.ex
Expand Up @@ -31,13 +31,26 @@ defmodule Crawly.Manager do

use GenServer

alias Crawly.Utils
alias Crawly.{Engine, Utils}

@spec add_workers(module(), non_neg_integer()) ::
:ok | {:error, :spider_non_exist}
def add_workers(spider_name, num_of_workers) do
case Engine.get_manager(spider_name) do
{:error, reason} ->
{:error, reason}

pid ->
GenServer.cast(pid, {:add_workers, num_of_workers})
end
end

def start_link(spider_name) do
Logger.debug("Starting the manager for #{spider_name}")
GenServer.start_link(__MODULE__, spider_name)
end

@impl true
def init(spider_name) do
# Getting spider start urls
[start_urls: urls] = spider_name.init()
Expand Down Expand Up @@ -83,6 +96,18 @@ defmodule Crawly.Manager do
%{name: spider_name, tref: tref, prev_scraped_cnt: 0, workers: worker_pids}}
end

@impl true
def handle_cast({:add_workers, num_of_workers}, state) do
Logger.info("Adding #{num_of_workers} workers for #{state.name}")

Enum.each(1..num_of_workers, fn _ ->
DynamicSupervisor.start_child(state.name, {Crawly.Worker, [state.name]})
end)

{:noreply, state}
end

@impl true
def handle_info(:operations, state) do
Process.cancel_timer(state.tref)

Expand All @@ -109,11 +134,11 @@ defmodule Crawly.Manager do
|> Utils.get_settings(state.name)
|> maybe_convert_to_integer()

maybe_stop_spider_by_timeout(
state.name,
maybe_stop_spider_by_timeout(
state.name,
delta,
closespider_timeout_limit
)
closespider_timeout_limit
)

tref =
Process.send_after(
Expand Down
25 changes: 24 additions & 1 deletion test/manager_test.exs
Expand Up @@ -26,6 +26,30 @@ defmodule ManagerTest do
end)
end

test "it is possible to add more workers to a spider" do
spider_name = Manager.TestSpider
:ok = Crawly.Engine.start_spider(spider_name)
initial_number_of_workers = 1

assert initial_number_of_workers ==
DynamicSupervisor.count_children(spider_name)[:workers]

workers = 2
assert :ok == Crawly.Manager.add_workers(spider_name, workers)

pid = Crawly.Engine.get_manager(spider_name)
state = :sys.get_state(pid)
assert spider_name == state.name

assert initial_number_of_workers + workers ==
DynamicSupervisor.count_children(spider_name)[:workers]
end

test "returns error when spider doesn't exist" do
assert {:error, :spider_not_found} ==
Crawly.Manager.add_workers(Manager.NonExistentSpider, 2)
end

test "max request per minute is respected" do
:ok = Crawly.Engine.start_spider(Manager.TestSpider)

Expand Down Expand Up @@ -53,7 +77,6 @@ defmodule ManagerTest do
end

test "Closespider timeout is respected" do

Process.register(self(), :spider_closed_callback_test)

# Ignore closespider_itemcount
Expand Down

0 comments on commit 30656b6

Please sign in to comment.