diff --git a/config/config.exs b/config/config.exs index 75b985e6..9ef779f1 100644 --- a/config/config.exs +++ b/config/config.exs @@ -45,6 +45,7 @@ config :crawly, # TODO: this looks outdated follow_redirect: true, + log_to_file: false, # Request middlewares middlewares: [ diff --git a/documentation/configuration.md b/documentation/configuration.md index bbf9be20..b9031293 100644 --- a/documentation/configuration.md +++ b/documentation/configuration.md @@ -133,7 +133,13 @@ Allows to specify a custom HTTP client which will be performing request to the c default: /tmp Set spider logs directory. All spiders have their own dedicated log file -stored under the `log_dir` folder. +stored under the `log_dir` folder. This option is ignored if `log_to_file` is not set to `true`. + +### log_to_file :: String.t() + +default: false + +Enables or disables file logging. ### port :: pos_integer() diff --git a/lib/crawly/engine.ex b/lib/crawly/engine.ex index 2ff3c13d..f264ac45 100644 --- a/lib/crawly/engine.ex +++ b/lib/crawly/engine.ex @@ -61,7 +61,9 @@ defmodule Crawly.Engine do |> Map.put_new_lazy(:crawl_id, &UUID.uuid1/0) # Filter all logs related to a given spider - set_spider_log(spider_name, opts[:crawl_id]) + if Crawly.Utils.get_settings(:log_to_file, spider_name) do + configure_spider_logs(spider_name, opts[:crawl_id]) + end GenServer.call( __MODULE__, @@ -240,14 +242,32 @@ defmodule Crawly.Engine do |> Enum.dedup_by(& &1) end - defp set_spider_log(spider_name, crawl_id) do - log_dir = Crawly.Utils.get_settings(:log_dir, spider_name, "/tmp") + defp configure_spider_logs(spider_name, crawl_id) do + log_dir = + Crawly.Utils.get_settings( + :log_dir, + spider_name, + System.tmp_dir() + ) + + current_unix_timestamp = :os.system_time(:second) + Logger.add_backend({LoggerFileBackend, :debug}) + log_file_path = + Path.join([ + log_dir, + inspect(spider_name), + # underscore separates the timestamp and the crawl_id + inspect(current_unix_timestamp) <> "_" <> crawl_id + ]) <> ".log" + Logger.configure_backend({LoggerFileBackend, :debug}, - path: "/#{log_dir}/#{spider_name}/#{crawl_id}.log", + path: log_file_path, level: :debug, metadata_filter: [crawl_id: crawl_id] ) + + Logger.debug("Writing logs to #{log_file_path}") end end diff --git a/test/engine_test.exs b/test/engine_test.exs index b5c1605b..a695c030 100644 --- a/test/engine_test.exs +++ b/test/engine_test.exs @@ -1,6 +1,17 @@ defmodule EngineTest do use ExUnit.Case + setup do + on_exit(fn -> + :meck.unload() + + Crawly.Engine.list_known_spiders() + |> Enum.each(fn s -> + Crawly.Engine.stop_spider(s) + end) + end) + end + test "list_known_spiders/0 lists all spiders and their current status in the engine" do Crawly.Engine.init([]) Crawly.Engine.refresh_spider_list() @@ -24,4 +35,24 @@ defmodule EngineTest do spiders = Crawly.Engine.list_known_spiders() assert Enum.all?(spiders, fn s -> s.status == :stopped end) end + + test ":log_to_file allows for logging to log file" do + :meck.expect(TestSpider, :override_settings, fn -> + [log_dir: "/my_tmp_dir", log_to_file: true] + end) + + :meck.expect(Logger, :configure_backend, fn {_, :debug}, opts -> + log_file_path = Keyword.get(opts, :path) + assert log_file_path =~ "TestSpider" + assert log_file_path =~ "/my_tmp_dir" + end) + + Crawly.Engine.init([]) + Crawly.Engine.refresh_spider_list() + + # test a started spider + Crawly.Engine.start_spider(TestSpider) + + assert :meck.num_calls(Logger, :configure_backend, :_) == 1 + end end