Skip to content

Commit

Permalink
File logging improvements (#170)
Browse files Browse the repository at this point in the history
* Make file logging optional

* Set default value for enable_file_logging to false

* Update documentation

* Change config name

* Code improvement

* Update documentation

* Add debug log for log file path

* refined log configuration, added log tests

Co-authored-by: TzeYiing <ty@tzeyiing.com>
  • Loading branch information
oshosanya and Ziinc committed Feb 3, 2021
1 parent 1ec03d7 commit 7e09e53
Show file tree
Hide file tree
Showing 4 changed files with 63 additions and 5 deletions.
1 change: 1 addition & 0 deletions config/config.exs
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ config :crawly,

# TODO: this looks outdated
follow_redirect: true,
log_to_file: false,

# Request middlewares
middlewares: [
Expand Down
8 changes: 7 additions & 1 deletion documentation/configuration.md
Original file line number Diff line number Diff line change
Expand Up @@ -133,7 +133,13 @@ Allows to specify a custom HTTP client which will be performing request to the c
default: /tmp

Set spider logs directory. All spiders have their own dedicated log file
stored under the `log_dir` folder.
stored under the `log_dir` folder. This option is ignored if `log_to_file` is not set to `true`.

### log_to_file :: String.t()

default: false

Enables or disables file logging.

### port :: pos_integer()

Expand Down
28 changes: 24 additions & 4 deletions lib/crawly/engine.ex
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,9 @@ defmodule Crawly.Engine do
|> Map.put_new_lazy(:crawl_id, &UUID.uuid1/0)

# Filter all logs related to a given spider
set_spider_log(spider_name, opts[:crawl_id])
if Crawly.Utils.get_settings(:log_to_file, spider_name) do
configure_spider_logs(spider_name, opts[:crawl_id])
end

GenServer.call(
__MODULE__,
Expand Down Expand Up @@ -240,14 +242,32 @@ defmodule Crawly.Engine do
|> Enum.dedup_by(& &1)
end

defp set_spider_log(spider_name, crawl_id) do
log_dir = Crawly.Utils.get_settings(:log_dir, spider_name, "/tmp")
defp configure_spider_logs(spider_name, crawl_id) do
log_dir =
Crawly.Utils.get_settings(
:log_dir,
spider_name,
System.tmp_dir()
)

current_unix_timestamp = :os.system_time(:second)

Logger.add_backend({LoggerFileBackend, :debug})

log_file_path =
Path.join([
log_dir,
inspect(spider_name),
# underscore separates the timestamp and the crawl_id
inspect(current_unix_timestamp) <> "_" <> crawl_id
]) <> ".log"

Logger.configure_backend({LoggerFileBackend, :debug},
path: "/#{log_dir}/#{spider_name}/#{crawl_id}.log",
path: log_file_path,
level: :debug,
metadata_filter: [crawl_id: crawl_id]
)

Logger.debug("Writing logs to #{log_file_path}")
end
end
31 changes: 31 additions & 0 deletions test/engine_test.exs
Original file line number Diff line number Diff line change
@@ -1,6 +1,17 @@
defmodule EngineTest do
use ExUnit.Case

setup do
on_exit(fn ->
:meck.unload()

Crawly.Engine.list_known_spiders()
|> Enum.each(fn s ->
Crawly.Engine.stop_spider(s)
end)
end)
end

test "list_known_spiders/0 lists all spiders and their current status in the engine" do
Crawly.Engine.init([])
Crawly.Engine.refresh_spider_list()
Expand All @@ -24,4 +35,24 @@ defmodule EngineTest do
spiders = Crawly.Engine.list_known_spiders()
assert Enum.all?(spiders, fn s -> s.status == :stopped end)
end

test ":log_to_file allows for logging to log file" do
:meck.expect(TestSpider, :override_settings, fn ->
[log_dir: "/my_tmp_dir", log_to_file: true]
end)

:meck.expect(Logger, :configure_backend, fn {_, :debug}, opts ->
log_file_path = Keyword.get(opts, :path)
assert log_file_path =~ "TestSpider"
assert log_file_path =~ "/my_tmp_dir"
end)

Crawly.Engine.init([])
Crawly.Engine.refresh_spider_list()

# test a started spider
Crawly.Engine.start_spider(TestSpider)

assert :meck.num_calls(Logger, :configure_backend, :_) == 1
end
end

0 comments on commit 7e09e53

Please sign in to comment.