Skip to content

Commit

Permalink
doc: Update hook/event functions
Browse files Browse the repository at this point in the history
[ci skip]
  • Loading branch information
chfoo committed Jun 10, 2016
1 parent b97409e commit 9eed1a4
Show file tree
Hide file tree
Showing 4 changed files with 19 additions and 51 deletions.
18 changes: 4 additions & 14 deletions wpull/database/wrap.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
from wpull.application.plugin import event_interface, PluginFunctions
from wpull.database.base import BaseURLTable
from wpull.application.hook import HookableMixin, HookDisconnected
from wpull.pipeline.item import Status
from wpull.url import parse_url_or_log
from wpull.pipeline.item import Status, URLRecord
from wpull.url import parse_url_or_log, URLInfo
import wpull.application.hook


Expand Down Expand Up @@ -91,24 +91,14 @@ def get_hostnames(self):

@staticmethod
@event_interface(PluginFunctions.queued_url)
def queued_url(url_info):
def queued_url(url_info: URLInfo):
'''Callback fired after an URL was put into the queue.
Args:
url_info (dict): A mapping containing the same information in
:class:`.url.URLInfo`.
'''

@staticmethod
@event_interface(PluginFunctions.dequeued_url)
def dequeued_url(url_info, record_info):
def dequeued_url(url_info: URLInfo, record_info: URLRecord):
'''Callback fired after an URL was retrieved from the queue.
Args:
url_info (dict): A mapping containing the same information in
:class:`.url.URLInfo`.
record_info (dict): A mapping containing the same information in
:class:`.item.URLRecord`.
'''

def get_root_url_todo_count(self):
Expand Down
2 changes: 1 addition & 1 deletion wpull/network/dns.py
Original file line number Diff line number Diff line change
Expand Up @@ -376,4 +376,4 @@ def resolve_dns(host: str) -> str:
@staticmethod
@event_interface(PluginFunctions.resolve_dns_result)
def resolve_dns_result(host: str, result: ResolveResult):
pass
'''Callback when a DNS resolution has been made.'''
22 changes: 11 additions & 11 deletions wpull/pipeline/session.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,23 +115,23 @@ def add_child_url(self, url: str, inline: bool=False,
'''Add links scraped from the document with automatic values.
Args:
urls: An iterable of `str` or `dict`. When a `str` is provided,
it is a URL. When a `dict` is provided, it is a mapping
of table column names to values.
inline: Whether the URL is an embedded object. This
function automatically calculates the value needed for
the table column "inline".
kwargs: Additional column value to be apllied for all URLs
provided.
url: A full URL. (It can't be a relative path.)
inline: Whether the URL is an embedded object.
link_type: Expected link type.
post_data: URL encoded form data. The request will be made using
POST. (Don't use this to upload files.)
level: The child depth of this URL.
replace: Whether to replace the existing entry in the database
table so it will be redownloaded again.
This function provides values automatically for:
* ``inline``
* ``level``
* ``referrer``
* ``top_url``
* ``parent``: The referrering page.
* ``root``
See also :meth:`.database.base.BaseSQLURLTable.add_many`.
See also :meth:`add_url`.
'''
url_properties = URLProperties()
url_properties.level = self.url_record.level + 1 if level is None else level
Expand Down
28 changes: 3 additions & 25 deletions wpull/processor/rule.py
Original file line number Diff line number Diff line change
Expand Up @@ -544,33 +544,11 @@ def scrape_document(self, item_session: ItemSession):
@staticmethod
@event_interface(PluginFunctions.get_urls)
def plugin_get_urls(item_session: ItemSession):
'''Return additional URLs to be added to the URL Table.
'''Add additional URLs to be added to the URL Table.
Args:
item_session:
.. Note:: The URLs provided do not replace entries in the URL Table.
If a URL already exists in the URL Table, it will be ignored
as defined in :class:`.database.URLTable`. As well, the URLs
added do not reset the item Status to ``todo``. To override
this behavior, see ``replace`` as described below.
Returns:
list: A ``list`` of ``dict``. Each ``dict`` contains:
* ``url``: a string of the URL
* ``link_type`` (str, optional): A string defined in
:class:`.item.LinkType`.
* ``inline`` (bool, optional): If True, the link is an
embedded HTML object.
* ``post_data`` (str, optional): If provided, the
request will be a POST request with a
``application/x-www-form-urlencoded`` content type.
* ``replace`` (bool, optional): If True and if the URL already
exists in the URL Table, the entry is deleted and replaced
with a new one.
When this event is dispatched, the caller should add any URLs needed
using :meth:`.ItemSession.add_child_url`.
'''
return

def _process_scrape_info(self, scraper: BaseScraper,
scrape_result: ScrapeResult,
Expand Down

0 comments on commit 9eed1a4

Please sign in to comment.