From aeaeef74cbddfd5f3a7b4fa1a25a2de645c956ce Mon Sep 17 00:00:00 2001
From: ziinc <tzeyiing@gmail.com>
Date: Mon, 18 May 2020 17:49:05 +0800
Subject: [PATCH 01/10] Added with option, moved all optional parameters to
 list opt

---
 lib/crawly.ex        | 77 +++++++++++++++++++++++++++++++++++---------
 test/crawly_test.exs | 35 ++++++++++++++++++--
 2 files changed, 94 insertions(+), 18 deletions(-)

diff --git a/lib/crawly.ex b/lib/crawly.ex
index 930de76a..d23f42cc 100644
--- a/lib/crawly.ex
+++ b/lib/crawly.ex
@@ -9,32 +9,74 @@ defmodule Crawly do
 
   The fetched URL is being converted to a request, and the request is piped
   through the middlewares specidied in a config (with the exception of
-  `Crawly.Middlewares.DomainFilter`, `Crawly.Middlewares.RobotsTxt` these 2 are
-  ignored)
+  `Crawly.Middlewares.DomainFilter`, `Crawly.Middlewares.RobotsTxt`)
+
+  Provide a spider with the `:with` option to fetch a given webpage using that spider.
+
 
   """
-  @spec fetch(url, headers, options) :: HTTPoison.Response.t()
+
+  @spec fetch(url, opts) :: HTTPoison.Response.t()
         when url: binary(),
-             headers: [],
-             options: []
-  def fetch(url, headers \\ [], options \\ []) do
-    request0 = Crawly.Request.new(url, headers, options)
+             opts: list()
+  def fetch(url, opts \\ []) do
+    opts = Enum.into(opts, %{with: nil, request_options: [], headers: []})
+
+    request0 =
+      Crawly.Request.new(url, opts[:headers], opts[:request_options])
+      |> Map.put(
+        :middlewares,
+        Crawly.Utils.get_settings(:middlewares, opts[:with], [])
+      )
+
     ignored_middlewares = [
       Crawly.Middlewares.DomainFilter,
       Crawly.Middlewares.RobotsTxt
     ]
-    middlewares = request0.middlewares -- ignored_middlewares
 
-    {request, _} = Crawly.Utils.pipe(middlewares, request0, %{})
+    new_middlewares = request0.middlewares -- ignored_middlewares
+
+    request0 =
+      Map.put(
+        request0,
+        :middlewares,
+        new_middlewares
+      )
+
+    {%{} = request, _} = Crawly.Utils.pipe(request0.middlewares, request0, %{})
 
-    {fetcher, client_options} = Application.get_env(
-      :crawly,
-      :fetcher,
-      {Crawly.Fetchers.HTTPoisonFetcher, []}
-    )
+    {fetcher, client_options} =
+      Crawly.Utils.get_settings(
+        :fetcher,
+        opts[:with],
+        {Crawly.Fetchers.HTTPoisonFetcher, []}
+      )
 
     {:ok, response} = fetcher.fetch(request, client_options)
-    response
+
+    case opts[:with] do
+      nil ->
+        # no spider provided, return response as is
+        response
+
+      _ ->
+        # spider provided, send response through  parse_item callback, pipe through the pipelines
+        with parsed_result <- parse(response, opts[:with]),
+             pipelines <-
+               Crawly.Utils.get_settings(
+                 :pipelines,
+                 opts[:with]
+               ),
+             items <- Map.get(parsed_result, :items, []),
+             pipeline_result <-
+               Enum.reduce(items, [], fn item, acc ->
+                 {piped, _state} = Crawly.Utils.pipe(pipelines, item, %{})
+
+                 [acc | piped]
+               end) do
+          {response, parsed_result, pipeline_result}
+        end
+    end
   end
 
   @doc """
@@ -49,13 +91,16 @@ defmodule Crawly do
     case Kernel.function_exported?(spider, :parse_item, 1) do
       false ->
         {:error, :spider_not_found}
+
       true ->
         spider.parse_item(response)
     end
   end
 
   @doc """
-  Returns a list of known modules which implements Crawly.Spider behaviour
+  Returns a list of known modules which implements Crawly.Spider behaviour.
+
+  Should not be used for spider management. Use functions defined in `Crawly.Engine` for that.
   """
   @spec list_spiders() :: [module()]
   def list_spiders(), do: Crawly.Utils.list_spiders()
diff --git a/test/crawly_test.exs b/test/crawly_test.exs
index b9f6b33b..a45a8414 100644
--- a/test/crawly_test.exs
+++ b/test/crawly_test.exs
@@ -2,7 +2,38 @@ defmodule CrawlyTest do
   use ExUnit.Case
   doctest Crawly
 
-  test "greets the world" do
-    assert :test == :test
+  setup do
+    :meck.new(CrawlyTestSpider)
+
+    :meck.expect(CrawlyTestSpider, :parse_items, fn resp ->
+      %{
+        items: ["hello"],
+        requests: [
+          Crawly.Utils.request_from_url("https://www.example.com/test")
+        ]
+      }
+    end)
+
+    on_exit(fn ->
+      :meck.unload(CrawlyTestSpider)
+    end)
+  end
+
+  test "fetch/1 is able to fetch a given url using global config, returns a response" do
+    assert %HTTPoison.Response{} = Crawly.fetch("https://example.com")
+  end
+
+  test "fetch/2 with :with option provided returns the response, parsed_item result, and processed ParsedItems" do
+    assert {%HTTPoison.Response{}, parsed_items_res, parsed_items} =
+             Crawly.fetch("http://example.com", with: CrawlyTestSpider)
+
+    assert %{
+             items: items,
+             requests: requests
+           } = parsed_items_res
+
+    assert is_list(parsed_items)
+    assert length(parsed_items) == 1
+    assert ["hello"] = parsed_items
   end
 end

From 7944ec2962c1e0ad5c7565c2dddf0a26b1be3bc6 Mon Sep 17 00:00:00 2001
From: Ziinc <ty@tzeyiing.com>
Date: Mon, 2 Nov 2020 14:01:02 +0800
Subject: [PATCH 02/10] fixed failing test, added mock for settings override in
 mocked spider

---
 lib/crawly.ex        | 26 +++++++++++++++++++-------
 test/crawly_test.exs | 31 +++++++++++++++++++------------
 2 files changed, 38 insertions(+), 19 deletions(-)

diff --git a/lib/crawly.ex b/lib/crawly.ex
index d23f42cc..d11f4ff3 100644
--- a/lib/crawly.ex
+++ b/lib/crawly.ex
@@ -15,10 +15,17 @@ defmodule Crawly do
 
 
   """
+  @type with_opt :: {:with, nil | module()}
+  @type request_opt :: {:request_options, list(Crawly.Request.option())}
+  @type headers_opt :: {:headers, list(Crawly.Request.header())}
 
   @spec fetch(url, opts) :: HTTPoison.Response.t()
         when url: binary(),
-             opts: list()
+             opts: [
+               with_opt
+               | request_opt
+               | headers_opt
+             ]
   def fetch(url, opts \\ []) do
     opts = Enum.into(opts, %{with: nil, request_options: [], headers: []})
 
@@ -68,13 +75,18 @@ defmodule Crawly do
                  opts[:with]
                ),
              items <- Map.get(parsed_result, :items, []),
-             pipeline_result <-
-               Enum.reduce(items, [], fn item, acc ->
-                 {piped, _state} = Crawly.Utils.pipe(pipelines, item, %{})
-
-                 [acc | piped]
+             {pipeline_result, pipeline_state} <-
+               Enum.reduce(items, {[], %{}}, fn item, {acc, state} ->
+                 {piped, state} = Crawly.Utils.pipe(pipelines, item, state)
+
+                 if piped == false do
+                   # dropped
+                   {acc, state}
+                 else
+                   {[piped | acc], state}
+                 end
                end) do
-          {response, parsed_result, pipeline_result}
+          {response, parsed_result, pipeline_result, pipeline_state}
         end
     end
   end
diff --git a/test/crawly_test.exs b/test/crawly_test.exs
index a45a8414..f6d30d01 100644
--- a/test/crawly_test.exs
+++ b/test/crawly_test.exs
@@ -3,37 +3,44 @@ defmodule CrawlyTest do
   doctest Crawly
 
   setup do
-    :meck.new(CrawlyTestSpider)
+    :meck.new(CrawlyTestSpider, [:non_strict])
 
-    :meck.expect(CrawlyTestSpider, :parse_items, fn resp ->
+    :meck.expect(CrawlyTestSpider, :parse_item, fn _resp ->
       %{
-        items: ["hello"],
+        items: [%{content: "hello"}],
         requests: [
           Crawly.Utils.request_from_url("https://www.example.com/test")
         ]
       }
     end)
 
+    :meck.expect(CrawlyTestSpider, :override_settings, fn ->
+      [pipelines: [Crawly.Pipelines.JSONEncoder]]
+    end)
+
     on_exit(fn ->
-      :meck.unload(CrawlyTestSpider)
+      :meck.unload()
     end)
+
+    {:ok, spider_module: CrawlyTestSpider}
   end
 
   test "fetch/1 is able to fetch a given url using global config, returns a response" do
     assert %HTTPoison.Response{} = Crawly.fetch("https://example.com")
   end
 
-  test "fetch/2 with :with option provided returns the response, parsed_item result, and processed ParsedItems" do
-    assert {%HTTPoison.Response{}, parsed_items_res, parsed_items} =
-             Crawly.fetch("http://example.com", with: CrawlyTestSpider)
+  test "fetch/2 with :with option provided returns the response, parsed_item result, and processed ParsedItems",
+       %{spider_module: spider_module} do
+    assert {%HTTPoison.Response{}, parsed_item_res, parsed_items,
+            pipeline_state} =
+             Crawly.fetch("http://example.com", with: spider_module)
 
     assert %{
-             items: items,
+             items: [_],
              requests: requests
-           } = parsed_items_res
+           } = parsed_item_res
 
-    assert is_list(parsed_items)
-    assert length(parsed_items) == 1
-    assert ["hello"] = parsed_items
+    assert [encoded] = parsed_items
+    assert encoded =~ "hello"
   end
 end

From beebfeea677b6024f89a62a7acccadb1d1d7da90 Mon Sep 17 00:00:00 2001
From: Ziinc <ty@tzeyiing.com>
Date: Mon, 2 Nov 2020 14:09:06 +0800
Subject: [PATCH 03/10] added typespecs and docs

---
 lib/crawly.ex | 20 +++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/lib/crawly.ex b/lib/crawly.ex
index d11f4ff3..7eff4973 100644
--- a/lib/crawly.ex
+++ b/lib/crawly.ex
@@ -13,13 +13,31 @@ defmodule Crawly do
 
   Provide a spider with the `:with` option to fetch a given webpage using that spider.
 
+  ### Fetching with a spider
+  To fetch a response from a url with a spider, define your spider, and pass the module name to the `:with` option.
 
+    iex> Crawly.fetch("https://www.example.com", with: MySpider)
+    {%HTTPoison.Response{...}, %{...}, [...], %{...}}
+
+  Using the `:with` option will return a 4 item tuple:
+
+  1. The HTTPoison response
+  2. The result returned from the `parse_item/1` callback
+  3. The list of items that have been processed by the declared item pipelines.
+  4. The pipeline state, included for debugging purposes.
   """
   @type with_opt :: {:with, nil | module()}
   @type request_opt :: {:request_options, list(Crawly.Request.option())}
   @type headers_opt :: {:headers, list(Crawly.Request.header())}
 
-  @spec fetch(url, opts) :: HTTPoison.Response.t()
+  @type parsed_item_result :: Crawly.ParsedItem.t()
+  @type parsed_items :: list(any())
+  @type pipeline_state :: %{optional(atom()) => any()}
+
+  @spec fetch(url, opts) ::
+          HTTPoison.Response.t()
+          | {HTTPoison.Response.t(), parsed_item_result, parsed_items,
+             pipeline_state}
         when url: binary(),
              opts: [
                with_opt

From a3af9b8615ed8fb19a183c8b7165ae4f70029a30 Mon Sep 17 00:00:00 2001
From: Ziinc <ty@tzeyiing.com>
Date: Mon, 2 Nov 2020 14:09:41 +0800
Subject: [PATCH 04/10] corrected typo in docs

---
 lib/crawly.ex | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/crawly.ex b/lib/crawly.ex
index 7eff4973..fae97481 100644
--- a/lib/crawly.ex
+++ b/lib/crawly.ex
@@ -8,7 +8,7 @@ defmodule Crawly do
   when you need to get individual pages and parse them.
 
   The fetched URL is being converted to a request, and the request is piped
-  through the middlewares specidied in a config (with the exception of
+  through the middlewares specified in a config (with the exception of
   `Crawly.Middlewares.DomainFilter`, `Crawly.Middlewares.RobotsTxt`)
 
   Provide a spider with the `:with` option to fetch a given webpage using that spider.

From 675e1c42d61890ac637e23427688b2ed085c8cbc Mon Sep 17 00:00:00 2001
From: Ziinc <ty@tzeyiing.com>
Date: Wed, 11 Nov 2020 23:11:16 +0800
Subject: [PATCH 05/10] remove doctest execution for Crawly module

---
 test/crawly_test.exs | 1 -
 1 file changed, 1 deletion(-)

diff --git a/test/crawly_test.exs b/test/crawly_test.exs
index f6d30d01..7dbd86db 100644
--- a/test/crawly_test.exs
+++ b/test/crawly_test.exs
@@ -1,6 +1,5 @@
 defmodule CrawlyTest do
   use ExUnit.Case
-  doctest Crawly
 
   setup do
     :meck.new(CrawlyTestSpider, [:non_strict])

From 3f7d0f44cf3c85a34cf63b5195119a671ad4a125 Mon Sep 17 00:00:00 2001
From: ziinc <tzeyiing@gmail.com>
Date: Mon, 18 May 2020 17:49:05 +0800
Subject: [PATCH 06/10] Added with option, moved all optional parameters to
 list opt

---
 lib/crawly.ex        | 67 +++++++++++++++++++++++++++++++++++---------
 test/crawly_test.exs | 35 +++++++++++++++++++++--
 2 files changed, 87 insertions(+), 15 deletions(-)

diff --git a/lib/crawly.ex b/lib/crawly.ex
index 07b07f20..d23f42cc 100644
--- a/lib/crawly.ex
+++ b/lib/crawly.ex
@@ -9,35 +9,74 @@ defmodule Crawly do
 
   The fetched URL is being converted to a request, and the request is piped
   through the middlewares specidied in a config (with the exception of
-  `Crawly.Middlewares.DomainFilter`, `Crawly.Middlewares.RobotsTxt` these 2 are
-  ignored)
+  `Crawly.Middlewares.DomainFilter`, `Crawly.Middlewares.RobotsTxt`)
+
+  Provide a spider with the `:with` option to fetch a given webpage using that spider.
+
 
   """
-  @spec fetch(url, headers, options) :: HTTPoison.Response.t()
+
+  @spec fetch(url, opts) :: HTTPoison.Response.t()
         when url: binary(),
-             headers: [],
-             options: []
-  def fetch(url, headers \\ [], options \\ []) do
-    request0 = Crawly.Request.new(url, headers, options)
+             opts: list()
+  def fetch(url, opts \\ []) do
+    opts = Enum.into(opts, %{with: nil, request_options: [], headers: []})
+
+    request0 =
+      Crawly.Request.new(url, opts[:headers], opts[:request_options])
+      |> Map.put(
+        :middlewares,
+        Crawly.Utils.get_settings(:middlewares, opts[:with], [])
+      )
 
     ignored_middlewares = [
       Crawly.Middlewares.DomainFilter,
       Crawly.Middlewares.RobotsTxt
     ]
 
-    middlewares = request0.middlewares -- ignored_middlewares
+    new_middlewares = request0.middlewares -- ignored_middlewares
+
+    request0 =
+      Map.put(
+        request0,
+        :middlewares,
+        new_middlewares
+      )
 
-    {request, _} = Crawly.Utils.pipe(middlewares, request0, %{})
+    {%{} = request, _} = Crawly.Utils.pipe(request0.middlewares, request0, %{})
 
     {fetcher, client_options} =
-      Application.get_env(
-        :crawly,
+      Crawly.Utils.get_settings(
         :fetcher,
+        opts[:with],
         {Crawly.Fetchers.HTTPoisonFetcher, []}
       )
 
     {:ok, response} = fetcher.fetch(request, client_options)
-    response
+
+    case opts[:with] do
+      nil ->
+        # no spider provided, return response as is
+        response
+
+      _ ->
+        # spider provided, send response through  parse_item callback, pipe through the pipelines
+        with parsed_result <- parse(response, opts[:with]),
+             pipelines <-
+               Crawly.Utils.get_settings(
+                 :pipelines,
+                 opts[:with]
+               ),
+             items <- Map.get(parsed_result, :items, []),
+             pipeline_result <-
+               Enum.reduce(items, [], fn item, acc ->
+                 {piped, _state} = Crawly.Utils.pipe(pipelines, item, %{})
+
+                 [acc | piped]
+               end) do
+          {response, parsed_result, pipeline_result}
+        end
+    end
   end
 
   @doc """
@@ -59,7 +98,9 @@ defmodule Crawly do
   end
 
   @doc """
-  Returns a list of known modules which implements Crawly.Spider behaviour
+  Returns a list of known modules which implements Crawly.Spider behaviour.
+
+  Should not be used for spider management. Use functions defined in `Crawly.Engine` for that.
   """
   @spec list_spiders() :: [module()]
   def list_spiders(), do: Crawly.Utils.list_spiders()
diff --git a/test/crawly_test.exs b/test/crawly_test.exs
index b9f6b33b..a45a8414 100644
--- a/test/crawly_test.exs
+++ b/test/crawly_test.exs
@@ -2,7 +2,38 @@ defmodule CrawlyTest do
   use ExUnit.Case
   doctest Crawly
 
-  test "greets the world" do
-    assert :test == :test
+  setup do
+    :meck.new(CrawlyTestSpider)
+
+    :meck.expect(CrawlyTestSpider, :parse_items, fn resp ->
+      %{
+        items: ["hello"],
+        requests: [
+          Crawly.Utils.request_from_url("https://www.example.com/test")
+        ]
+      }
+    end)
+
+    on_exit(fn ->
+      :meck.unload(CrawlyTestSpider)
+    end)
+  end
+
+  test "fetch/1 is able to fetch a given url using global config, returns a response" do
+    assert %HTTPoison.Response{} = Crawly.fetch("https://example.com")
+  end
+
+  test "fetch/2 with :with option provided returns the response, parsed_item result, and processed ParsedItems" do
+    assert {%HTTPoison.Response{}, parsed_items_res, parsed_items} =
+             Crawly.fetch("http://example.com", with: CrawlyTestSpider)
+
+    assert %{
+             items: items,
+             requests: requests
+           } = parsed_items_res
+
+    assert is_list(parsed_items)
+    assert length(parsed_items) == 1
+    assert ["hello"] = parsed_items
   end
 end

From 0d0f87a9ab5a16259c1adad5b2a853f32b36be17 Mon Sep 17 00:00:00 2001
From: Ziinc <ty@tzeyiing.com>
Date: Mon, 2 Nov 2020 14:01:02 +0800
Subject: [PATCH 07/10] fixed failing test, added mock for settings override in
 mocked spider

---
 lib/crawly.ex        | 26 +++++++++++++++++++-------
 test/crawly_test.exs | 31 +++++++++++++++++++------------
 2 files changed, 38 insertions(+), 19 deletions(-)

diff --git a/lib/crawly.ex b/lib/crawly.ex
index d23f42cc..d11f4ff3 100644
--- a/lib/crawly.ex
+++ b/lib/crawly.ex
@@ -15,10 +15,17 @@ defmodule Crawly do
 
 
   """
+  @type with_opt :: {:with, nil | module()}
+  @type request_opt :: {:request_options, list(Crawly.Request.option())}
+  @type headers_opt :: {:headers, list(Crawly.Request.header())}
 
   @spec fetch(url, opts) :: HTTPoison.Response.t()
         when url: binary(),
-             opts: list()
+             opts: [
+               with_opt
+               | request_opt
+               | headers_opt
+             ]
   def fetch(url, opts \\ []) do
     opts = Enum.into(opts, %{with: nil, request_options: [], headers: []})
 
@@ -68,13 +75,18 @@ defmodule Crawly do
                  opts[:with]
                ),
              items <- Map.get(parsed_result, :items, []),
-             pipeline_result <-
-               Enum.reduce(items, [], fn item, acc ->
-                 {piped, _state} = Crawly.Utils.pipe(pipelines, item, %{})
-
-                 [acc | piped]
+             {pipeline_result, pipeline_state} <-
+               Enum.reduce(items, {[], %{}}, fn item, {acc, state} ->
+                 {piped, state} = Crawly.Utils.pipe(pipelines, item, state)
+
+                 if piped == false do
+                   # dropped
+                   {acc, state}
+                 else
+                   {[piped | acc], state}
+                 end
                end) do
-          {response, parsed_result, pipeline_result}
+          {response, parsed_result, pipeline_result, pipeline_state}
         end
     end
   end
diff --git a/test/crawly_test.exs b/test/crawly_test.exs
index a45a8414..f6d30d01 100644
--- a/test/crawly_test.exs
+++ b/test/crawly_test.exs
@@ -3,37 +3,44 @@ defmodule CrawlyTest do
   doctest Crawly
 
   setup do
-    :meck.new(CrawlyTestSpider)
+    :meck.new(CrawlyTestSpider, [:non_strict])
 
-    :meck.expect(CrawlyTestSpider, :parse_items, fn resp ->
+    :meck.expect(CrawlyTestSpider, :parse_item, fn _resp ->
       %{
-        items: ["hello"],
+        items: [%{content: "hello"}],
         requests: [
           Crawly.Utils.request_from_url("https://www.example.com/test")
         ]
       }
     end)
 
+    :meck.expect(CrawlyTestSpider, :override_settings, fn ->
+      [pipelines: [Crawly.Pipelines.JSONEncoder]]
+    end)
+
     on_exit(fn ->
-      :meck.unload(CrawlyTestSpider)
+      :meck.unload()
     end)
+
+    {:ok, spider_module: CrawlyTestSpider}
   end
 
   test "fetch/1 is able to fetch a given url using global config, returns a response" do
     assert %HTTPoison.Response{} = Crawly.fetch("https://example.com")
   end
 
-  test "fetch/2 with :with option provided returns the response, parsed_item result, and processed ParsedItems" do
-    assert {%HTTPoison.Response{}, parsed_items_res, parsed_items} =
-             Crawly.fetch("http://example.com", with: CrawlyTestSpider)
+  test "fetch/2 with :with option provided returns the response, parsed_item result, and processed ParsedItems",
+       %{spider_module: spider_module} do
+    assert {%HTTPoison.Response{}, parsed_item_res, parsed_items,
+            pipeline_state} =
+             Crawly.fetch("http://example.com", with: spider_module)
 
     assert %{
-             items: items,
+             items: [_],
              requests: requests
-           } = parsed_items_res
+           } = parsed_item_res
 
-    assert is_list(parsed_items)
-    assert length(parsed_items) == 1
-    assert ["hello"] = parsed_items
+    assert [encoded] = parsed_items
+    assert encoded =~ "hello"
   end
 end

From 3be5d87b2e6d893e3a4c6f35149cdf69a6e767bd Mon Sep 17 00:00:00 2001
From: Ziinc <ty@tzeyiing.com>
Date: Mon, 2 Nov 2020 14:09:06 +0800
Subject: [PATCH 08/10] added typespecs and docs

---
 lib/crawly.ex | 20 +++++++++++++++++++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/lib/crawly.ex b/lib/crawly.ex
index d11f4ff3..7eff4973 100644
--- a/lib/crawly.ex
+++ b/lib/crawly.ex
@@ -13,13 +13,31 @@ defmodule Crawly do
 
   Provide a spider with the `:with` option to fetch a given webpage using that spider.
 
+  ### Fetching with a spider
+  To fetch a response from a url with a spider, define your spider, and pass the module name to the `:with` option.
 
+    iex> Crawly.fetch("https://www.example.com", with: MySpider)
+    {%HTTPoison.Response{...}, %{...}, [...], %{...}}
+
+  Using the `:with` option will return a 4 item tuple:
+
+  1. The HTTPoison response
+  2. The result returned from the `parse_item/1` callback
+  3. The list of items that have been processed by the declared item pipelines.
+  4. The pipeline state, included for debugging purposes.
   """
   @type with_opt :: {:with, nil | module()}
   @type request_opt :: {:request_options, list(Crawly.Request.option())}
   @type headers_opt :: {:headers, list(Crawly.Request.header())}
 
-  @spec fetch(url, opts) :: HTTPoison.Response.t()
+  @type parsed_item_result :: Crawly.ParsedItem.t()
+  @type parsed_items :: list(any())
+  @type pipeline_state :: %{optional(atom()) => any()}
+
+  @spec fetch(url, opts) ::
+          HTTPoison.Response.t()
+          | {HTTPoison.Response.t(), parsed_item_result, parsed_items,
+             pipeline_state}
         when url: binary(),
              opts: [
                with_opt

From 19fa4963edcbb78482b832def4fa0ef0176b040d Mon Sep 17 00:00:00 2001
From: Ziinc <ty@tzeyiing.com>
Date: Mon, 2 Nov 2020 14:09:41 +0800
Subject: [PATCH 09/10] corrected typo in docs

---
 lib/crawly.ex | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/lib/crawly.ex b/lib/crawly.ex
index 7eff4973..fae97481 100644
--- a/lib/crawly.ex
+++ b/lib/crawly.ex
@@ -8,7 +8,7 @@ defmodule Crawly do
   when you need to get individual pages and parse them.
 
   The fetched URL is being converted to a request, and the request is piped
-  through the middlewares specidied in a config (with the exception of
+  through the middlewares specified in a config (with the exception of
   `Crawly.Middlewares.DomainFilter`, `Crawly.Middlewares.RobotsTxt`)
 
   Provide a spider with the `:with` option to fetch a given webpage using that spider.

From e2b7de80b7dfc89fce073c24080111ba50a36f75 Mon Sep 17 00:00:00 2001
From: Ziinc <ty@tzeyiing.com>
Date: Wed, 11 Nov 2020 23:11:16 +0800
Subject: [PATCH 10/10] remove doctest execution for Crawly module

---
 test/crawly_test.exs | 1 -
 1 file changed, 1 deletion(-)

diff --git a/test/crawly_test.exs b/test/crawly_test.exs
index f6d30d01..7dbd86db 100644
--- a/test/crawly_test.exs
+++ b/test/crawly_test.exs
@@ -1,6 +1,5 @@
 defmodule CrawlyTest do
   use ExUnit.Case
-  doctest Crawly
 
   setup do
     :meck.new(CrawlyTestSpider, [:non_strict])