diff --git a/lib/crawly/middlewares/domain_filter.ex b/lib/crawly/middlewares/domain_filter.ex index 05238f11..9454615a 100644 --- a/lib/crawly/middlewares/domain_filter.ex +++ b/lib/crawly/middlewares/domain_filter.ex @@ -1,12 +1,23 @@ defmodule Crawly.Middlewares.DomainFilter do @moduledoc """ - Filters out requests which are going outside of the crawled domain + Filters out requests which are going outside of the crawled domain. + + The domain that is used to compare against the request url is obtained from the spider's `c:Crawly.Spider.base_url` callback. + + Does not accept any options. Tuple-based configuration optionswill be ignored. + + ### Example Declaration + ``` + middlewares: [ + Crawly.Middlewares.DomainFilter + ] + ``` """ @behaviour Crawly.Pipeline require Logger - def run(request, state) do + def run(request, state, _opts \\ []) do base_url = state.spider_name.base_url() case String.contains?(request.url, base_url) do