Skip to content

Commit

Permalink
Add static typing (#229)
Browse files Browse the repository at this point in the history
  • Loading branch information
facelessuser committed Oct 26, 2021
1 parent 2abff64 commit 9fd8c41
Show file tree
Hide file tree
Showing 21 changed files with 627 additions and 367 deletions.
2 changes: 1 addition & 1 deletion .pyspelling.yml
Expand Up @@ -71,7 +71,7 @@ matrix:
context_visible_first: true
delimiters:
# Ignore lint (noqa) and coverage (pragma) as well as shebang (#!)
- open: '^(?: *(?:noqa\b|pragma: no cover)|!)'
- open: '^(?: *(?:noqa\b|pragma: no cover|type: .*?)|!)'
close: '$'
# Ignore Python encoding string -*- encoding stuff -*-
- open: '^ *-\*-'
Expand Down
2 changes: 1 addition & 1 deletion MANIFEST.in
@@ -1,4 +1,4 @@
recursive-include soupsieve *.py
recursive-include soupsieve *.py py.typed
recursive-include tests *.py
recursive-include docs/src/markdown *.md *.png *.gif *.html
recursive-include docs/src/dictionary *.txt
Expand Down
1 change: 1 addition & 0 deletions docs/src/markdown/_snippets/selector_styles.txt
@@ -1,6 +1,7 @@
<style>
.twemoji.big-icon svg {
max-height: initial !important;
height: 3rem !important;
width: 3rem !important;
}

Expand Down
13 changes: 7 additions & 6 deletions docs/src/markdown/about/changelog.md
Expand Up @@ -2,13 +2,14 @@

## 2.3

- **NEW**: `:has()`, `:is()`, and `:where()` now use use a forgiving selector list. While not as forgiving as due to
syntax errors as CSS might be, it will forgive such things as empty sets and empty slots due to multiple consecutive
commas, leading commas, or trailing commas. Essentially, these pseudo-classes will match all non-empty selectors and
ignore empty ones. As the scraping environment is different that a browser environment, it was chosen not to
aggressively forgive bad syntax and invalid features to ensure the user is alerted that their program may not perform
as expected.
- **NEW**: Add static typing.
- **NEW**: `:has()`, `:is()`, and `:where()` now use use a forgiving selector list. While not as forgiving as CSS might
be, it will forgive such things as empty sets and empty slots due to multiple consecutive commas, leading commas, or
trailing commas. Essentially, these pseudo-classes will match all non-empty selectors and ignore empty ones. As the
scraping environment is different than a browser environment, it was chosen not to aggressively forgive bad syntax and
invalid features to ensure the user is alerted that their program may not perform as expected.
- **NEW**: Add support to output a pretty print format of a compiled `SelectorList` for debug purposes.
- **FIX**: Some small corner cases discovered with static typing.

## 2.2.1

Expand Down
32 changes: 16 additions & 16 deletions docs/src/markdown/selectors/pseudo-classes.md
Expand Up @@ -255,22 +255,22 @@ Selects elements that have no children and no text (whitespace is ignored).
```

=== "Usage"
```pycon3
>>> from bs4 import BeautifulSoup as bs
>>> html = """
... <html>
... <head></head>
... <body>
... <span> <!-- comment --> </span>
... <span></span>
... <span><span> </span></span>
... </body>
... </html>
... """
>>> soup = bs(html, 'html5lib')
>>> print(soup.select('body :empty'))
[<span> <!-- comment --> </span>, <span></span>, <span> </span>]
```
```pycon3
>>> from bs4 import BeautifulSoup as bs
>>> html = """
... <html>
... <head></head>
... <body>
... <span> <!-- comment --> </span>
... <span></span>
... <span><span> </span></span>
... </body>
... </html>
... """
>>> soup = bs(html, 'html5lib')
>>> print(soup.select('body :empty'))
[<span> <!-- comment --> </span>, <span></span>, <span> </span>]
```

!!! tip "Additional Reading"
https://developer.mozilla.org/en-US/docs/Web/CSS/:empty
Expand Down
14 changes: 7 additions & 7 deletions mkdocs.yml
Expand Up @@ -22,6 +22,9 @@ theme:
features:
- navigation.tabs
- navigation.top
- naviagtion.instant
pymdownx:
sponsor: "https://github.com/sponsors/facelessuser"

nav:
- Home:
Expand All @@ -43,7 +46,7 @@ nav:

markdown_extensions:
- markdown.extensions.toc:
slugify: !!python/name:pymdownx.slugs.uslugify
slugify: !!python/object/apply:pymdownx.slugs.slugify {kwds: {case: lower}}
permalink: ""
- markdown.extensions.admonition:
- markdown.extensions.smarty:
Expand All @@ -57,10 +60,6 @@ markdown_extensions:
- pymdownx.superfences:
- pymdownx.highlight:
extend_pygments_lang:
- name: php-inline
lang: php
options:
startinline: true
- name: pycon3
lang: pycon
options:
Expand Down Expand Up @@ -92,6 +91,7 @@ markdown_extensions:
separator: "\uff0b"
- pymdownx.details:
- pymdownx.tabbed:
alternate_style: true
- pymdownx.saneheaders:

extra:
Expand All @@ -105,6 +105,6 @@ plugins:
- search:
separator: '[:\s\-]+'
- git-revision-date-localized
# - minify:
# minify_html: true
- minify:
minify_html: true
- mkdocs_pymdownx_material_extras
3 changes: 2 additions & 1 deletion requirements/docs.txt
@@ -1,3 +1,4 @@
mkdocs_pymdownx_material_extras==1.2.2
mkdocs_pymdownx_material_extras==1.5.4
mkdocs-git-revision-date-localized-plugin
mkdocs-minify-plugin
pyspelling
1 change: 1 addition & 0 deletions requirements/tests.txt
Expand Up @@ -4,3 +4,4 @@ coverage
lxml
html5lib
beautifulsoup4
mypy
4 changes: 3 additions & 1 deletion setup.py
Expand Up @@ -51,6 +51,7 @@ def get_description():
author_email='Isaac.Muse@gmail.com',
url='https://github.com/facelessuser/soupsieve',
packages=find_packages(exclude=['test*', 'tools']),
package_data={"soupsieve": ["py.typed"]},
install_requires=get_requirements("requirements/project.txt"),
license='MIT License',
classifiers=[
Expand All @@ -65,6 +66,7 @@ def get_description():
'Programming Language :: Python :: 3.8',
'Programming Language :: Python :: 3.9',
'Topic :: Internet :: WWW/HTTP :: Dynamic Content',
'Topic :: Software Development :: Libraries :: Python Modules'
'Topic :: Software Development :: Libraries :: Python Modules',
'Typing :: Typed'
]
)
87 changes: 71 additions & 16 deletions soupsieve/__init__.py
Expand Up @@ -30,6 +30,8 @@
from . import css_match as cm
from . import css_types as ct
from .util import DEBUG, SelectorSyntaxError # noqa: F401
import bs4 # type: ignore[import]
from typing import Dict, Optional, Any, List, Iterator, Iterable

__all__ = (
'DEBUG', 'SelectorSyntaxError', 'SoupSieve',
Expand All @@ -40,15 +42,18 @@
SoupSieve = cm.SoupSieve


def compile(pattern, namespaces=None, flags=0, **kwargs): # noqa: A001
def compile( # noqa: A001
pattern: str,
namespaces: Optional[Dict[str, str]] = None,
flags: int = 0,
*,
custom: Optional[Dict[str, str]] = None,
**kwargs: Any
) -> cm.SoupSieve:
"""Compile CSS pattern."""

if namespaces is not None:
namespaces = ct.Namespaces(namespaces)

custom = kwargs.get('custom')
if custom is not None:
custom = ct.CustomSelectors(custom)
ns = ct.Namespaces(namespaces) if namespaces is not None else namespaces # type: Optional[ct.Namespaces]
cs = ct.CustomSelectors(custom) if custom is not None else custom # type: Optional[ct.CustomSelectors]

if isinstance(pattern, SoupSieve):
if flags:
Expand All @@ -59,53 +64,103 @@ def compile(pattern, namespaces=None, flags=0, **kwargs): # noqa: A001
raise ValueError("Cannot process 'custom' argument on a compiled selector list")
return pattern

return cp._cached_css_compile(pattern, namespaces, custom, flags)
return cp._cached_css_compile(pattern, ns, cs, flags)


def purge():
def purge() -> None:
"""Purge cached patterns."""

cp._purge_cache()


def closest(select, tag, namespaces=None, flags=0, **kwargs):
def closest(
select: str,
tag: 'bs4.Tag',
namespaces: Optional[Dict[str, str]] = None,
flags: int = 0,
*,
custom: Optional[Dict[str, str]] = None,
**kwargs: Any
) -> 'bs4.Tag':
"""Match closest ancestor."""

return compile(select, namespaces, flags, **kwargs).closest(tag)


def match(select, tag, namespaces=None, flags=0, **kwargs):
def match(
select: str,
tag: 'bs4.Tag',
namespaces: Optional[Dict[str, str]] = None,
flags: int = 0,
*,
custom: Optional[Dict[str, str]] = None,
**kwargs: Any
) -> bool:
"""Match node."""

return compile(select, namespaces, flags, **kwargs).match(tag)


def filter(select, iterable, namespaces=None, flags=0, **kwargs): # noqa: A001
def filter( # noqa: A001
select: str,
iterable: Iterable['bs4.Tag'],
namespaces: Optional[Dict[str, str]] = None,
flags: int = 0,
*,
custom: Optional[Dict[str, str]] = None,
**kwargs: Any
) -> List['bs4.Tag']:
"""Filter list of nodes."""

return compile(select, namespaces, flags, **kwargs).filter(iterable)


def select_one(select, tag, namespaces=None, flags=0, **kwargs):
def select_one(
select: str,
tag: 'bs4.Tag',
namespaces: Optional[Dict[str, str]] = None,
flags: int = 0,
*,
custom: Optional[Dict[str, str]] = None,
**kwargs: Any
) -> 'bs4.Tag':
"""Select a single tag."""

return compile(select, namespaces, flags, **kwargs).select_one(tag)


def select(select, tag, namespaces=None, limit=0, flags=0, **kwargs):
def select(
select: str,
tag: 'bs4.Tag',
namespaces: Optional[Dict[str, str]] = None,
limit: int = 0,
flags: int = 0,
*,
custom: Optional[Dict[str, str]] = None,
**kwargs: Any
) -> List['bs4.Tag']:
"""Select the specified tags."""

return compile(select, namespaces, flags, **kwargs).select(tag, limit)


def iselect(select, tag, namespaces=None, limit=0, flags=0, **kwargs):
def iselect(
select: str,
tag: 'bs4.Tag',
namespaces: Optional[Dict[str, str]] = None,
limit: int = 0,
flags: int = 0,
*,
custom: Optional[Dict[str, str]] = None,
**kwargs: Any
) -> Iterator['bs4.Tag']:
"""Iterate the specified tags."""

for el in compile(select, namespaces, flags, **kwargs).iselect(tag, limit):
yield el


def escape(ident):
def escape(ident: str) -> str:
"""Escape identifier."""

return cp.escape(ident)
22 changes: 13 additions & 9 deletions soupsieve/__meta__.py
Expand Up @@ -79,7 +79,11 @@ class Version(namedtuple("Version", ["major", "minor", "micro", "release", "pre"
"""

def __new__(cls, major, minor, micro, release="final", pre=0, post=0, dev=0):
def __new__(
cls,
major: int, minor: int, micro: int, release: str = "final",
pre: int = 0, post: int = 0, dev: int = 0
) -> "Version":
"""Validate version info."""

# Ensure all parts are positive integers.
Expand Down Expand Up @@ -115,27 +119,27 @@ def __new__(cls, major, minor, micro, release="final", pre=0, post=0, dev=0):

return super(Version, cls).__new__(cls, major, minor, micro, release, pre, post, dev)

def _is_pre(self):
def _is_pre(self) -> bool:
"""Is prerelease."""

return self.pre > 0
return bool(self.pre > 0)

def _is_dev(self):
def _is_dev(self) -> bool:
"""Is development."""

return bool(self.release < "alpha")

def _is_post(self):
def _is_post(self) -> bool:
"""Is post."""

return self.post > 0
return bool(self.post > 0)

def _get_dev_status(self): # pragma: no cover
def _get_dev_status(self) -> str: # pragma: no cover
"""Get development status string."""

return DEV_STATUS[self.release]

def _get_canonical(self):
def _get_canonical(self) -> str:
"""Get the canonical output string."""

# Assemble major, minor, micro version and append `pre`, `post`, or `dev` if needed..
Expand All @@ -153,7 +157,7 @@ def _get_canonical(self):
return ver


def parse_version(ver):
def parse_version(ver: str) -> Version:
"""Parse version into a comparable Version tuple."""

m = RE_VER.match(ver)
Expand Down

0 comments on commit 9fd8c41

Please sign in to comment.