From a1ef1a422b3b8972fd3b353d4467f35f05581b86 Mon Sep 17 00:00:00 2001 From: ziinc Date: Tue, 10 Dec 2019 18:23:42 +0800 Subject: [PATCH] added draft logic comments for fetchers implementation --- lib/crawly/worker.ex | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/lib/crawly/worker.ex b/lib/crawly/worker.ex index 4ca8c819..a531405d 100644 --- a/lib/crawly/worker.ex +++ b/lib/crawly/worker.ex @@ -43,16 +43,18 @@ defmodule Crawly.Worker do case :epipe.run(functions, {request, spider_name}) do {:error, _step, reason, _step_state} -> # TODO: Add retry logic - Logger.error( - fn -> - "Crawly worker could not process the request to #{inspect(request.url)} + Logger.error(fn -> + "Crawly worker could not process the request to #{ + inspect(request.url) + } reason: #{inspect(reason)}" - end) + end) + @default_backoff + {:ok, _result} -> @default_backoff end - end Process.send_after(self(), :work, new_backoff) @@ -66,6 +68,9 @@ defmodule Crawly.Worker do response: HTTPoison.Response.t(), result: {:ok, response, spider_name} | {:error, term()} defp get_response({request, spider_name}) do + # check if spider-level fetcher is set. Overrides the globally configured fetcher. + # if not set, log warning for explicit config preferred, get the globally-configured fetcher. Defaults to HTTPoison + case HTTPoison.get(request.url, request.headers, request.options) do {:ok, response} -> {:ok, {response, spider_name}}