Skip to content

Commit

Permalink
Merge pull request #57 from oltarasenko/fetch
Browse files Browse the repository at this point in the history
Add new fetch and parse functions
  • Loading branch information
oltarasenko committed Feb 17, 2020
2 parents 2d530e0 + 0ac9c7f commit 9ba0d67
Showing 1 changed file with 34 additions and 3 deletions.
37 changes: 34 additions & 3 deletions lib/crawly.ex
Original file line number Diff line number Diff line change
Expand Up @@ -5,21 +5,52 @@ defmodule Crawly do

@doc """
Fetches a given url. This function is mainly used for the spiders development
when you need to get individual pages and parse them
when you need to get individual pages and parse them.
The fetched URL is being converted to a request, and the request is piped
through the middlewares specidied in a config (with the exception of
`Crawly.Middlewares.DomainFilter`, `Crawly.Middlewares.RobotsTxt` these 2 are
ignored)
"""
@spec fetch(url, headers, options) :: HTTPoison.Response.t()
when url: binary(),
headers: [],
options: []
def fetch(url, headers \\ [], options \\ []) do
request = Crawly.Request.new(url, headers, options)
request0 = Crawly.Request.new(url, headers, options)
ignored_middlewares = [
Crawly.Middlewares.DomainFilter,
Crawly.Middlewares.RobotsTxt
]
middlewares = request0.middlewares -- ignored_middlewares

{request, _} = Crawly.Utils.pipe(middlewares, request0, %{})

{fetcher, client_options} = Application.get_env(
:crawly,
:fetcher,
{Crawly.Fetchers.HTTPoisonFetcher, []}
)

fetcher.fetch(request, client_options)
{:ok, response} = fetcher.fetch(request, client_options)
response
end

@doc """
Parses a given response with a given spider. Allows to quickly see the outcome
of the given :parse_item implementation.
"""
@spec parse(response, spider) :: {:ok, result}
when response: Crawly.Response.t(),
spider: atom(),
result: Crawly.ParsedItem.t()
def parse(response, spider) do
case Kernel.function_exported?(spider, :parse_item, 1) do
false ->
{:error, :spider_not_found}
true ->
spider.parse_item(response)
end
end
end

0 comments on commit 9ba0d67

Please sign in to comment.