Skip to content

Commit

Permalink
Add additional type hints to the UrlHasher class
Browse files Browse the repository at this point in the history
During review type hints were mentioned, and it made me take a second look to
see if I had missed any that could be easily added. This was especially
important for some of the instance variables that were added, as they are
expected to be modified if desired.
  • Loading branch information
mcdonnnj committed Feb 16, 2021
1 parent 3499d5c commit 2c51f2b
Showing 1 changed file with 13 additions and 8 deletions.
21 changes: 13 additions & 8 deletions src/hash_http_content/hasher.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,20 +94,23 @@ def __init__(
logging.debug("Default browser options: %s", default_browser_options)

# Number of retries
self._retries = 3
self._retries: int = 3
logging.debug("Using retry value of '%d'", self._retries)

# Timeout in seconds
self._timeout = 5
self._timeout: int = 5
logging.debug("Using request timeout limit of '%d' seconds", self._timeout)

self.__browser_options = {**default_browser_options, **browser_options}
self.__browser_options: Dict[str, Any] = {
**default_browser_options,
**browser_options,
}
logging.debug("Using browser options: %s", self.__browser_options)

self._browser: Browser = None
self._browser_page: Page = None
self._default_encoding = encoding
self._hash_algorithm = hash_algorithm
self._default_encoding: str = encoding
self._hash_algorithm: str = hash_algorithm

logging.debug("Using default encoding '%s'", self._default_encoding)
logging.debug("Using hashing algorithm '%s'", self._hash_algorithm)
Expand Down Expand Up @@ -223,8 +226,10 @@ def _handle_html(self, contents: bytes, encoding: str) -> HandlerResult:
soup: BeautifulSoup = BeautifulSoup(page_contents, "lxml")
text_elements = soup.find_all(text=True)
visible_text_elements = filter(self._is_visible_element, text_elements)
visible_text = " ".join(t.strip() for t in visible_text_elements if t.strip())
visible_bytes = bytes(visible_text, self._default_encoding)
visible_text: str = " ".join(
t.strip() for t in visible_text_elements if t.strip()
)
visible_bytes: bytes = bytes(visible_text, self._default_encoding)

digest: str = get_hash_digest(self._hash_algorithm, visible_bytes)

Expand Down Expand Up @@ -267,7 +272,7 @@ def hash_url(self, url: str, verify: Union[bool, str] = True) -> UrlResult:
raise err

# https://tools.ietf.org/html/rfc7231#section-3.1.1.5
content_type = (
content_type: str = (
resp.headers.get("content-type", "application/octet-stream").strip().lower()
)

Expand Down

0 comments on commit 2c51f2b

Please sign in to comment.