Skip to content

Commit

Permalink
Add pretty print debug (#227)
Browse files Browse the repository at this point in the history
  • Loading branch information
facelessuser committed Sep 10, 2021
1 parent 30b6089 commit 2abff64
Show file tree
Hide file tree
Showing 4 changed files with 201 additions and 1 deletion.
1 change: 1 addition & 0 deletions docs/src/markdown/about/changelog.md
Expand Up @@ -8,6 +8,7 @@
ignore empty ones. As the scraping environment is different that a browser environment, it was chosen not to
aggressively forgive bad syntax and invalid features to ensure the user is alerted that their program may not perform
as expected.
- **NEW**: Add support to output a pretty print format of a compiled `SelectorList` for debug purposes.

## 2.2.1

Expand Down
57 changes: 57 additions & 0 deletions docs/src/markdown/about/development.md
Expand Up @@ -193,6 +193,63 @@ object that may chain other `SelectorLists` objects depending on the complexity
a selector list, then you will get multiple `Selector` objects (one for each compound selector in the list) which in
turn may chain other `Selector` objects.

To view the selector list in in a compiled object for debugging purposes, one can access it via `SoupSieve.selectors`,
though it is recommended to pretty print them:

```pycon3
>>> import soupsieve as sv
>>> sv.compile('this > that.class[name=value]').selectors.pretty()
SelectorList(
selectors=(
Selector(
tag=SelectorTag(
name='that',
prefix=None),
ids=(),
classes=(
'class',
),
attributes=(
SelectorAttribute(
attribute='name',
prefix='',
pattern=re.compile(
'^value$'),
xml_type_pattern=None),
),
nth=(),
selectors=(),
relation=SelectorList(
selectors=(
Selector(
tag=SelectorTag(
name='this',
prefix=None),
ids=(),
classes=(),
attributes=(),
nth=(),
selectors=(),
relation=SelectorList(
selectors=(),
is_not=False,
is_html=False),
rel_type='>',
contains=(),
lang=(),
flags=0),
),
is_not=False,
is_html=False),
rel_type=None,
contains=(),
lang=(),
flags=0),
),
is_not=False,
is_html=False)
```

### `SelectorList`

```py3
Expand Down
8 changes: 7 additions & 1 deletion soupsieve/css_types.py
@@ -1,6 +1,7 @@
"""CSS selector structure items."""
import copyreg
from collections.abc import Hashable, Mapping
from .pretty import pretty

__all__ = (
'Selector',
Expand Down Expand Up @@ -80,11 +81,16 @@ def __repr__(self): # pragma: no cover
"""Representation."""

return "{}({})".format(
self.__base__(), ', '.join(["{}={!r}".format(k, getattr(self, k)) for k in self.__slots__[:-1]])
self.__class__.__name__, ', '.join(["{}={!r}".format(k, getattr(self, k)) for k in self.__slots__[:-1]])
)

__str__ = __repr__

def pretty(self): # pragma: no cover
"""Pretty print."""

print(pretty(self))


class ImmutableDict(Mapping):
"""Hashable, immutable dictionary."""
Expand Down
136 changes: 136 additions & 0 deletions soupsieve/pretty.py
@@ -0,0 +1,136 @@
"""
Format a pretty string of a `SoupSieve` object for easy debugging.
This won't necessarily support all types and such, and definitely
not support custom outputs.
It is mainly geared towards our types as the `SelectorList`
object is a beast to look at without some indentation and newlines.
The format and various output types is fairly known (though it
hasn't been tested extensively to make sure we aren't missing corners).
Example:
```
>>> import soupsieve as sv
>>> sv.compile('this > that.class[name=value]').selectors.pretty()
SelectorList(
selectors=(
Selector(
tag=SelectorTag(
name='that',
prefix=None),
ids=(),
classes=(
'class',
),
attributes=(
SelectorAttribute(
attribute='name',
prefix='',
pattern=re.compile(
'^value$'),
xml_type_pattern=None),
),
nth=(),
selectors=(),
relation=SelectorList(
selectors=(
Selector(
tag=SelectorTag(
name='this',
prefix=None),
ids=(),
classes=(),
attributes=(),
nth=(),
selectors=(),
relation=SelectorList(
selectors=(),
is_not=False,
is_html=False),
rel_type='>',
contains=(),
lang=(),
flags=0),
),
is_not=False,
is_html=False),
rel_type=None,
contains=(),
lang=(),
flags=0),
),
is_not=False,
is_html=False)
```
"""
import re

RE_CLASS = re.compile(r'(?i)[a-z_][_a-z\d\.]+\(')
RE_PARAM = re.compile(r'(?i)[_a-z][_a-z\d]+=')
RE_EMPTY = re.compile(r'\(\)|\[\]|\{\}')
RE_LSTRT = re.compile(r'\[')
RE_DSTRT = re.compile(r'\{')
RE_TSTRT = re.compile(r'\(')
RE_LEND = re.compile(r'\]')
RE_DEND = re.compile(r'\}')
RE_TEND = re.compile(r'\)')
RE_INT = re.compile(r'\d+')
RE_KWORD = re.compile(r'(?i)[_a-z][_a-z\d]+')
RE_DQSTR = re.compile(r'"(?:\\.|[^"\\])*"')
RE_SQSTR = re.compile(r"'(?:\\.|[^'\\])*'")
RE_SEP = re.compile(r'\s*(,)\s*')
RE_DSEP = re.compile(r'\s*(:)\s*')

TOKENS = {
'class': RE_CLASS,
'param': RE_PARAM,
'empty': RE_EMPTY,
'lstrt': RE_LSTRT,
'dstrt': RE_DSTRT,
'tstrt': RE_TSTRT,
'lend': RE_LEND,
'dend': RE_DEND,
'tend': RE_TEND,
'sqstr': RE_SQSTR,
'sep': RE_SEP,
'dsep': RE_DSEP,
'int': RE_INT,
'kword': RE_KWORD,
'dqstr': RE_DQSTR
}


def pretty(obj): # pragma: no cover
"""Make the object output string pretty."""

sel = str(obj)
index = 0
end = len(sel) - 1
indent = 0
output = []

while index <= end:
m = None
for k, v in TOKENS.items():
m = v.match(sel, index)

if m:
name = k
index = m.end(0)
if name in ('class', 'lstrt', 'dstrt', 'tstrt'):
indent += 4
output.append('{}\n{}'.format(m.group(0), " " * indent))
elif name in ('param', 'int', 'kword', 'sqstr', 'dqstr', 'empty'):
output.append(m.group(0))
elif name in ('lend', 'dend', 'tend'):
indent -= 4
output.append(m.group(0))
elif name in ('sep',):
output.append('{}\n{}'.format(m.group(1), " " * indent))
elif name in ('dsep',):
output.append('{} '.format(m.group(1)))
break

return ''.join(output)

0 comments on commit 2abff64

Please sign in to comment.