diff --git a/docs/src/dictionary/en-custom.txt b/docs/src/dictionary/en-custom.txt index e6b244f9..48ce11fe 100644 --- a/docs/src/dictionary/en-custom.txt +++ b/docs/src/dictionary/en-custom.txt @@ -2,6 +2,7 @@ API Accessors Aspell BeautifulSoup +CDATA CSS CSS's Changelog diff --git a/docs/src/markdown/about/changelog.md b/docs/src/markdown/about/changelog.md index 6e371780..e70e32ad 100644 --- a/docs/src/markdown/about/changelog.md +++ b/docs/src/markdown/about/changelog.md @@ -1,5 +1,12 @@ # Changelog +## 1.0.0b2 + +- **NEW**: Drop document flags. Document type can be detected from the Beautiful Soup object directly. +- **FIX**: CSS selectors should be evaluated with CSS whitespace rules. +- **FIX**: Processing instructions, CDATA, and declarations should all be ignored in `:contains` and child considerations for `:empty`. +- **FIX**: In Beautiful Soup, the document itself is the first tag. Do not match the "document" tag by returning false for any tag that doesn't have a parent. + ## 1.0.0b1 - **NEW**: Add support for non-standard `:contains()` selector. diff --git a/docs/src/markdown/about/development.md b/docs/src/markdown/about/development.md index 2c54d4eb..37abd2eb 100644 --- a/docs/src/markdown/about/development.md +++ b/docs/src/markdown/about/development.md @@ -220,7 +220,7 @@ class SelectorTag: class SelectorAttribute: """Selector attribute rule.""" - def __init__(self, attribute, prefix, pattern): + def __init__(self, attribute, prefix, pattern, xml_type_pattern): """Initialize.""" ``` @@ -229,6 +229,7 @@ class SelectorAttribute: `attribute` | Contains the attribute name to match. `prefix` | Contains the attribute namespace prefix to match if any. `pattern` | Contains a `re` regular expression object that matches the desired attribute value. +`xml_type_pattern` | As the default `type` pattern is case insensitive, when the attribute value is `type` and a case sensitivity has not been explicitly defined, a secondary case sensitive `type` pattern is compiled for use with XML documents when detected. ### `SelectorNth` diff --git a/docs/src/markdown/api.md b/docs/src/markdown/api.md index 53a56b57..e7c254ce 100644 --- a/docs/src/markdown/api.md +++ b/docs/src/markdown/api.md @@ -1,39 +1,12 @@ # API -## `soupsieve.HTML5` +Soup Sieve will detect the document type being used from the Beautiful Soup object that is given to it. For all HTML document types, it will treat tag names and attribute names without case sensitivity like most browsers do (even with XHTML). For HTML5, XHTML and XML, it will consider namespaces per the document's support (provided by the parser). To get namespaces support in HTML5, it is recommended to use `html5lib` as the parser. Some additional configuration is required when using namespaces, see [Namespace](#namespaces) for more information. -`HTML5` is a flag that instructs Soup Sieve to use HTML5 logic. When the `HTML5` flag is used, Soup Sieve will take into account namespaces for known embedded HTML5 namespaces such as SVG. `HTML5` will also not compare tag names and attribute names with case sensitivity. +While attribute values are always generally treated as case sensitive, HTML5, XHTML, and HTML treat the `type` attribute special. The `type` attribute's value is always case insensitive. This is generally how most browsers treat `type`. If you need `type` to be sensitive, you can use the `s` flag: `#!css [type="submit" s]`. -!!! tip - While attribute values are always treated as case sensitive, HTML5, XHTML, and HTML treat the `type` attribute special, `type`'s value is always case insensitive. This is generally how most browsers treat `type`. +## Flags - If you need `type` to be sensitive, you can use the `s` flag: `#!css [type="submit" s]`. - -Keep in mind, that Soup Sieve itself is not responsible for deciding what tag has or does not have a namespace. This is actually determined by the parser used in Beautiful Soup. This flag only tells Soup Sieve that the parser should be calculating namespaces, so it is okay to look at them. The user is responsible for using an appropriate parser for HTML5. If using the [lxml][lxml] or [html5lib][html5lib] with Beautiful Soup, HTML5 namespaces *should* be accounted for in the parsing. If you are using Python's builtin HTML parser, this may not be the case. - -## `soupsieve.HTML` - -`HTML` is a flag that instructs Soup Sieve to use pre HTML5 logic. When the `HTML` flag is used, Soup Sieve will not consider namespaces when evaluating elements. `HTML` will also not compare tag names and attribute names with case sensitivity. - -!!! tip - While attribute values are always treated as case sensitive, HTML5, XHTML, and HTML treat the `type` attribute special, `type`'s value is always case insensitive. This is generally how most browsers treat `type`. - - If you need `type` to be sensitive, you can use the `s` flag: `#!css [type="submit" s]`. - -## `soupsieve.XML` - -`XML` is a flag that instructs Soup Sieve to use XML logic. `XML` will cause Soup Sieve to take namespaces into considerations, and it will evaluate tag names and attribute names with case sensitivity. It will also relax what it considers valid tag name and attribute characters. It will also disable `.class` and `#id` selectors this is more an HTML concept. - -## `soupsieve.XHTML` - -`XHTML` is a flag that instructs Soup Sieve to use XHTML logic. This will cause Soup Sieve to take namespaces into considerations, and evaluate tag names and attributes names with no case sensitivity as this is how most browsers deal with XHTML tags. `.class` and `#id` are perfectly valid in XHTML. - -!!! tip - While attribute values are always treated as case sensitive, HTML5, XHTML, and HTML treat the `type` attribute special, `type`'s value is always case insensitive. This is generally how most browsers treat `type`. - - If you need `type` to be sensitive, you can use the `s` flag: `#!css [type="submit" s]`. - -It is recommend to use the `xml` mode in Beautiful Soup when parsing XHTML documents. +There are no flags at this time, but the parameter is provided for potential future use. ## `soupsieve.select()` @@ -44,7 +17,7 @@ def select(select, node, namespaces=None, limit=0, flags=0): `select` given a tag, will select all tags that match the provided CSS selector string. You can give `limit` a positive integer to return a specific number tags (0 means to return all tags). -`select` accepts a CSS selector string, a `node` or element, an optional [namespace](#namespaces) dictionary, a `limit`, and `flags`. If no flags are specified, HTML5 mode will be assumed. +`select` accepts a CSS selector string, a `node` or element, an optional [namespace](#namespaces) dictionary, a `limit`, and `flags`. ```pycon3 >>> import soupsieve as sv @@ -64,13 +37,13 @@ def iselect(select, node, namespaces=None, limit=0, flags=0): ## `soupsieve.match()` ```py3 -def match(select, node, namespaces=None, mode=0): +def match(select, node, namespaces=None, flags=0): """Match node.""" ``` `match` matches a given node/element with a given CSS selector. -`match` accepts a CSS selector string, a `node` or element, an optional [namespace](#namespaces) dictionary, and flags. If no flags are specified, HTML5 mode will be assumed. +`match` accepts a CSS selector string, a `node` or element, an optional [namespace](#namespaces) dictionary, and flags. ```pycon3 >>> nodes = sv.select('p:is(.a, .b, .c)', soup) @@ -89,7 +62,7 @@ def filter(select, nodes, namespaces=None, flags=0): `filter` takes an iterable containing HTML nodes and will filter them based on the provided CSS selector string. If given a Beautiful Soup tag, it will iterate the children that are tags. -`filter` accepts a CSS selector string, an iterable containing tags, an optional [namespace](#namespaces) dictionary, and flags. If no flags are specified, HTML5 mode will be assumed. +`filter` accepts a CSS selector string, an iterable containing tags, an optional [namespace](#namespaces) dictionary, and flags. ```pycon3 >>> sv.filter('p:not(.b)', soup.div) @@ -105,7 +78,7 @@ def comments(node, limit=0, flags=0): `comments` if useful to extract all comments from a document or document tag. It will extract from the given tag down through all of its children. You can limit how many comments are returned with `limit`. -`comments` accepts a `node` or element, a `limit`, and a flags. If no flags are specified, HTML5 mode will be assumed. +`comments` accepts a `node` or element, a `limit`, and flags. ## `soupsieve.icomments()` @@ -173,3 +146,7 @@ namespace = { ``` Tags do not necessarily have to have a prefix for Soup Sieve to recognize them. For instance, in HTML5, SVG *should* automatically get the SVG namespace. Depending how namespaces were defined in the documentation, tags may inherit namespaces in some conditions. Namespace assignment is mainly handled by the parser and exposed through the Beautiful Soup API. Soup Sieve uses the Beautiful Soup API to then compare namespaces when the appropriate document that supports namespaces is set. + +--8<-- +refs.txt +--8<-- diff --git a/docs/src/markdown/selectors.md b/docs/src/markdown/selectors.md index 9b98fdca..39febb07 100644 --- a/docs/src/markdown/selectors.md +++ b/docs/src/markdown/selectors.md @@ -54,9 +54,7 @@ Selector | Example | Descript `:empty` | `#!css p:empty` | Selects every `#!html

` element that has no children and either no text. Whitespace and comments are ignored. !!! warning "Experimental Selectors" - `:has()` implementation is experimental and may change. There are currently no reference implementation available in any browsers, not to mention the CSS4 specifications have not been finalized, so current implementation is based on our best interpretation. - - Recent addition of `:nth-*`, `:first-*`, `:last-*`, and `:only-*` is experimental. It has been implemented to the best of our understanding, especially `of S` support. Any issues with should be reported. + `:has()` and `of S` support (in `:nth-child(an+b [of S]?)`) is experimental and may change. There are currently no reference implementations available in any browsers, not to mention the CSS4 specifications have not been finalized, so current implementation is based on our best interpretation. Any issues should be reported. ## Custom Selectors @@ -67,3 +65,7 @@ Just because we include selectors from one source, does not mean we have intenti Selector | Example | Description ------------------------------- | ----------------------------------- | ----------- `:contains(text)` | `#!css p:contains(text)` | Select all `#!html

` elements that contain "text" in their content, either directly in themselves or indirectly in their decedents. + +--8<-- +refs.txt +--8<-- diff --git a/soupsieve/__init__.py b/soupsieve/__init__.py index 05cd4ec8..5bfd26bb 100644 --- a/soupsieve/__init__.py +++ b/soupsieve/__init__.py @@ -40,7 +40,7 @@ SoupSieve = cm.SoupSieve -def compile(pattern, namespaces=None, flags=HTML5): # noqa: A001 +def compile(pattern, namespaces=None, flags=0): # noqa: A001 """Compile CSS pattern.""" if namespaces is None: diff --git a/soupsieve/__meta__.py b/soupsieve/__meta__.py index 030d5802..15f43345 100644 --- a/soupsieve/__meta__.py +++ b/soupsieve/__meta__.py @@ -186,5 +186,5 @@ def parse_version(ver, pre=False): return Version(major, minor, micro, release, pre, post, dev) -__version_info__ = Version(1, 0, 0, "beta", 1) +__version_info__ = Version(1, 0, 0, "beta", 2) __version__ = __version_info__._get_canonical() diff --git a/soupsieve/css_match.py b/soupsieve/css_match.py index 11609a46..7dfd0c29 100644 --- a/soupsieve/css_match.py +++ b/soupsieve/css_match.py @@ -5,7 +5,7 @@ from .util import deprecated # Empty tag pattern (whitespace okay) -RE_NOT_EMPTY = re.compile('[^ \t\r\n]') +RE_NOT_EMPTY = re.compile('[^ \t\r\n\f]') # Relationships REL_PARENT = ' ' @@ -19,6 +19,8 @@ REL_HAS_SIBLING = ':~' REL_HAS_CLOSE_SIBLING = ':+' +NS_XHTML = 'http://www.w3.org/1999/xhtml' + class CSSMatch: """Perform CSS matching.""" @@ -29,9 +31,6 @@ def __init__(self, selectors, namespaces, flags): self.selectors = selectors self.namespaces = namespaces self.flags = flags - self.mode = flags & util.MODE_MSK - if self.mode == 0: - self.mode == util.DEFAULT_MODE def get_namespace(self, el): """Get the namespace for the element.""" @@ -45,18 +44,12 @@ def get_namespace(self, el): def supports_namespaces(self): """Check if namespaces are supported in the HTML type.""" - return self.mode in (util.HTML5, util.XHTML, util.XML) - - def is_xml(self): - """Check if document is an XML type.""" - - return self.mode in (util.XHTML, util.XML) + return self.is_xml or self.html_namespace def get_attribute(self, el, attr, prefix): """Get attribute from element if it exists.""" value = None - is_xml = self.is_xml() if self.supports_namespaces(): value = None # If we have not defined namespaces, we can't very well find them, so don't bother trying. @@ -81,7 +74,7 @@ def get_attribute(self, el, attr, prefix): # We can't match our desired prefix attribute as the attribute doesn't have a prefix if prefix and not p and prefix != '*': continue - if is_xml: + if self.is_xml: # The prefix doesn't match if prefix and p and prefix != '*' and prefix != p: continue @@ -140,17 +133,15 @@ def match_attributes(self, el, attributes): if attributes: for a in attributes: value = self.get_attribute(el, a.attribute, a.prefix) + pattern = a.xml_type_pattern if not self.html_namespace and a.xml_type_pattern else a.pattern if isinstance(value, list): value = ' '.join(value) - if a.pattern is None and value is None: - match = False - break - elif a.pattern is not None and value is None: + if value is None: match = False break - elif a.pattern is None: + elif pattern is None: continue - elif value is None or a.pattern.match(value) is None: + elif pattern.match(value) is None: match = False break return match @@ -160,7 +151,7 @@ def match_tagname(self, el, tag): return not ( tag.name and - tag.name not in ((util.lower(el.name) if not self.is_xml() else el.name), '*') + tag.name not in ((util.lower(el.name) if not self.is_xml else el.name), '*') ) def match_tag(self, el, tag): @@ -284,7 +275,7 @@ def match_nth_tag_type(self, el, child): """Match tag type for `nth` matches.""" return( - (child.name == (util.lower(el.name) if not self.is_xml() else el.name)) and + (child.name == (util.lower(el.name) if not self.is_xml else el.name)) and (not self.supports_namespaces() or self.get_namespace(child) == self.get_namespace(el)) ) @@ -295,8 +286,6 @@ def match_nth(self, el, nth): for n in nth: matched = False - if not el.parent: - break if n.selectors and not self.match_selectors(el, n.selectors): break parent = el.parent @@ -390,20 +379,22 @@ def match_nth(self, el, nth): break return matched - def has_child(self, el): - """Check if element has child.""" - - found_child = False - for child in el.children: - if isinstance(child, util.CHILD): - found_child = True - break - return found_child - def match_empty(self, el, empty): """Check if element is empty (if requested).""" - return not empty or (RE_NOT_EMPTY.search(el.text) is None and not self.has_child(el)) + is_empty = True + if empty: + for child in el.children: + if isinstance(child, util.TAG): + is_empty = False + break + elif ( + (isinstance(child, util.NAV_STRINGS) and not isinstance(child, util.NON_CONTENT_STRINGS)) and + RE_NOT_EMPTY.search(child) + ): + is_empty = False + break + return is_empty def match_subselectors(self, el, selectors): """Match selectors.""" @@ -417,9 +408,10 @@ def match_subselectors(self, el, selectors): def match_contains(self, el, contains): """Match element if it contains text.""" + types = (util.NAV_STRINGS,) if not self.is_xml else (util.NAV_STRINGS, util.CDATA) match = True for c in contains: - if c not in el.get_text(): + if c not in el.get_text(types=types): match = False break return match @@ -428,7 +420,6 @@ def match_selectors(self, el, selectors): """Check if element matches one of the selectors.""" match = False - is_html = self.mode != util.XML is_not = selectors.is_not for selector in selectors: match = is_not @@ -441,10 +432,10 @@ def match_selectors(self, el, selectors): if not self.match_empty(el, selector.empty): continue # Verify id matches - if is_html and selector.ids and not self.match_id(el, selector.ids): + if selector.ids and not self.match_id(el, selector.ids): continue # Verify classes match - if is_html and selector.classes and not self.match_classes(el, selector.classes): + if selector.classes and not self.match_classes(el, selector.classes): continue # Verify attribute(s) match if not self.match_attributes(el, selector.attributes): @@ -464,10 +455,27 @@ def match_selectors(self, el, selectors): return match + def is_html_ns(self, el): + """Check if in HTML namespace.""" + + ns = getattr(el, 'namespace') if el else None + return ns and ns == NS_XHTML + def match(self, el): """Match.""" - return isinstance(el, util.TAG) and self.match_selectors(el, self.selectors) + doc = el + while doc.parent: + doc = doc.parent + root = None + for child in doc.children: + if isinstance(child, util.TAG): + root = child + break + self.html_namespace = self.is_html_ns(root) + self.is_xml = doc.is_xml and not self.html_namespace + + return isinstance(el, util.TAG) and el.parent and self.match_selectors(el, self.selectors) class SoupSieve(util.Immutable): diff --git a/soupsieve/css_parser.py b/soupsieve/css_parser.py index 8bf9032d..ab49d51c 100644 --- a/soupsieve/css_parser.py +++ b/soupsieve/css_parser.py @@ -8,63 +8,54 @@ # Sub-patterns parts -CSS_ESCAPES = r'(?:\\[a-f0-9]{1,6}[ ]?|\\.)' +WS = r'[ \t\r\n\f]' -NTH = r'(?:[-+])?(?:\d+n?|n)(?:(?<=n)\s*(?:[-+])\s*(?:\d+))?' +CSS_ESCAPES = r'(?:\\[a-f0-9]{{1,6}}{ws}?|\\.)'.format(ws=WS) -VALUE = r'''(?P"(?:\\.|[^\\"]+)*?"|'(?:\\.|[^\\']+)*?'|(?:[^'"\[\] \t\r\n]|{esc})+)'''.format(esc=CSS_ESCAPES) +NTH = r'(?:[-+])?(?:\d+n?|n)(?:(?<=n){ws}*(?:[-+]){ws}*(?:\d+))?'.format(ws=WS) -ATTR = r''' -(?:\s*(?P[~^|*$]?=)\s* # compare -{value} -(?P[ ]+[is])?)?\s*\] # case sensitive -'''.format(value=VALUE) +VALUE = r'''(?P"(?:\\.|[^\\"]+)*?"|'(?:\\.|[^\\']+)*?'|(?:[^'"\[\] \f\t\r\n]|{esc})+)'''.format(esc=CSS_ESCAPES) + +ATTR = r'''(?:{ws}*(?P[~^|*$]?=){ws}*{value}(?P{ws}+[is])?)?{ws}*\]'''.format(ws=WS, value=VALUE) # Selector patterns PAT_ID = r'#(?:[-\w]|{esc})+'.format(esc=CSS_ESCAPES) PAT_CLASS = r'\.(?:[-\w]|{esc})+'.format(esc=CSS_ESCAPES) -PAT_HTML_TAG = r'(?:(?:(?:[-\w]|{esc})+|\*)?\|)?(?:(?:[-\w]|{esc})+|\*)'.format(esc=CSS_ESCAPES) - -PAT_XML_TAG = r'(?:(?:(?:[-\w.]|{esc})+|\*)?\|)?(?:(?:[-\w.]|{esc})+|\*)'.format(esc=CSS_ESCAPES) - -PAT_HTML_ATTR = r'''(?x) -\[\s*(?P(?:(?:(?:[-\w]|{esc})+|\*)?\|)?(?:[-\w]|{esc})+) -{attr} -'''.format(esc=CSS_ESCAPES, attr=ATTR) +PAT_TAG = r'(?:(?:(?:[-\w]|{esc})+|\*)?\|)?(?:(?:[-\w]|{esc})+|\*)'.format(esc=CSS_ESCAPES) -PAT_XML_ATTR = r'''(?x) -\[\s*(?P(?:(?:(?:[-\w]|{esc})+|\*)?\|)?(?:[-\w.]|{esc})+) +PAT_ATTR = r'''(?x) +\[{ws}*(?P(?:(?:(?:[-\w]|{esc})+|\*)?\|)?(?:[-\w]|{esc})+) {attr} -'''.format(esc=CSS_ESCAPES, attr=ATTR) +'''.format(ws=WS, esc=CSS_ESCAPES, attr=ATTR) PAT_PSEUDO_OPEN = r':(?:has|is|matches|not|where)\(' -PAT_PSEUDO_CLOSE = r'\)' +PAT_PSEUDO_CLOSE = r'{ws}*\)'.format(ws=WS) PAT_PSEUDO = r':(?:empty|root|(?:first|last|only)-(?:child|of-type))\b' PAT_PSEUDO_NTH_CHILD = r'''(?x) (?P:nth-(?:last-)?child -\(\s*(?P{nth}|even|odd)\s*(?:\)|(?<=\s)of\s+)) -'''.format(nth=NTH) +\({ws}*(?P{nth}|even|odd){ws}*(?:\)|(?<={ws})of{ws}+)) +'''.format(ws=WS, nth=NTH) PAT_PSEUDO_NTH_TYPE = r'''(?x) (?P:nth-(?:last-)?of-type -\(\s*(?P{nth}|even|odd)\s*\)) -'''.format(nth=NTH) +\({ws}*(?P{nth}|even|odd){ws}*\)) +'''.format(ws=WS, nth=NTH) -PAT_SPLIT = r'\s*?(?P[,+>~]|[ ](?![,+>~]))\s*' +PAT_SPLIT = r'{ws}*?(?P[,+>~]|{ws}(?![,+>~])){ws}*'.format(ws=WS) # Extra selector patterns -PAT_CONTAINS = r':contains\(\s*{value}\s*\)'.format(value=VALUE) +PAT_CONTAINS = r':contains\({ws}*{value}{ws}*\)'.format(ws=WS, value=VALUE) # CSS escape pattern -RE_CSS_ESC = re.compile(r'(?:(\\[a-f0-9]{1,6}[ ]?)|(\\.))', re.I) +RE_CSS_ESC = re.compile(r'(?:(\\[a-f0-9]{{1,6}}{ws}?)|(\\.))'.format(ws=WS), re.I) # Pattern to break up `nth` specifiers -RE_NTH = re.compile(r'(?P[-+])?(?P\d+n?|n)(?:(?<=n)\s*(?P[-+])\s*(?P\d+))?', re.I) +RE_NTH = re.compile(r'(?P[-+])?(?P\d+n?|n)(?:(?<=n){ws}*(?P[-+]){ws}*(?P\d+))?'.format(ws=WS), re.I) SPLIT = ',' REL_HAS_CHILD = ": " @@ -115,24 +106,6 @@ def enabled(self, flags): return True -class HtmlSelectorPattern(SelectorPattern): - """HTML selector pattern.""" - - def enabled(self, flags): - """Enabled.""" - - return (flags & util.MODE_MSK) in (util.HTML, util.HTML5, util.XHTML) - - -class XmlSelectorPattern(SelectorPattern): - """XML selector pattern.""" - - def enabled(self, flags): - """Enabled.""" - - return (flags & util.MODE_MSK) in (util.XML,) - - class _Selector: """ Intermediate selector class. @@ -208,12 +181,10 @@ class CSSParser: ("contains", SelectorPattern(PAT_CONTAINS)), ("pseudo_nth_child", SelectorPattern(PAT_PSEUDO_NTH_CHILD)), ("pseudo_nth_type", SelectorPattern(PAT_PSEUDO_NTH_TYPE)), - ("id", HtmlSelectorPattern(PAT_ID)), - ("class", HtmlSelectorPattern(PAT_CLASS)), - ("html_tag", HtmlSelectorPattern(PAT_HTML_TAG)), - ("xml_tag", XmlSelectorPattern(PAT_XML_TAG)), - ("html_attribute", HtmlSelectorPattern(PAT_HTML_ATTR)), - ("xml_attribute", XmlSelectorPattern(PAT_XML_ATTR)), + ("id", SelectorPattern(PAT_ID)), + ("class", SelectorPattern(PAT_CLASS)), + ("tag", SelectorPattern(PAT_TAG)), + ("attribute", SelectorPattern(PAT_ATTR)), ("pseudo_close", SelectorPattern(PAT_PSEUDO_CLOSE)), ("combine", SelectorPattern(PAT_SPLIT)) ] @@ -224,13 +195,11 @@ def __init__(self, selector, flags=0): self.pattern = selector self.flags = flags - mode = flags & util.MODE_MSK - - if mode in (util.HTML, util.HTML5, util.XML, util.XHTML, 0): - self.mode = mode if mode else util.DEFAULT_MODE - else: - raise ValueError("Invalid SelectorMatcher flag(s) '{}'".format(mode)) - self.adjusted_flags = flags | self.mode + dflags = self.flags & util.DEPRECATED_FLAGS + if dflags: + util.warn_deprecated( + "The following flags are deprecated and may be repurposed in the future '0x%02X'" % dflags + ) def parse_attribute_selector(self, sel, m, has_selector): """Create attribute selector from the returned regex match.""" @@ -238,6 +207,8 @@ def parse_attribute_selector(self, sel, m, has_selector): case = util.lower(m.group('case').strip()) if m.group('case') else None parts = [css_unescape(a.strip()) for a in m.group('ns_attr').split('|')] ns = '' + is_type = False + pattern2 = None if len(parts) > 1: ns = parts[0] attr = parts[1] @@ -245,10 +216,12 @@ def parse_attribute_selector(self, sel, m, has_selector): attr = parts[0] if case: flags = re.I if case == 'i' else 0 - elif self.mode == util.XML: - flags = 0 + elif util.lower(attr) == 'type': + flags = re.I + is_type = True else: - flags = re.I if util.lower(attr) == 'type' and not ns else 0 + flags = 0 + op = m.group('cmp') if op: value = css_unescape( @@ -277,8 +250,10 @@ def parse_attribute_selector(self, sel, m, has_selector): else: # Value matches pattern = re.compile(r'^%s$' % re.escape(value), flags) + if is_type: + pattern2 = re.compile(pattern.pattern) has_selector = True - sel.attributes.append(ct.SelectorAttribute(attr, ns, pattern)) + sel.attributes.append(ct.SelectorAttribute(attr, ns, pattern, pattern2)) return has_selector def parse_tag_pattern(self, sel, m, has_selector): @@ -437,7 +412,10 @@ def parse_split(self, sel, m, has_selector, selectors, relations, is_pseudo): relations.clear() else: sel.relations.extend(relations) - sel.rel_type = m.group('relation') + rel_type = m.group('relation').strip() + if not rel_type: + rel_type = ' ' + sel.rel_type = rel_type relations.clear() relations.append(sel) sel = _Selector() @@ -512,9 +490,9 @@ def parse_selectors(self, iselector, is_pseudo=False, is_not=False, is_has=False has_selector, sel = self.parse_split(sel, m, has_selector, selectors, relations, is_pseudo) split_last = True continue - elif key in ('html_attribute', 'xml_attribute'): + elif key == 'attribute': has_selector = self.parse_attribute_selector(sel, m, has_selector) - elif key in ('html_tag', 'xml_tag'): + elif key == 'tag': if has_selector: raise SyntaxError("Tag must come first") has_selector = self.parse_tag_pattern(sel, m, has_selector) @@ -555,7 +533,7 @@ def selector_iter(self, pattern): while index <= end: m = None for k, v in self.css_tokens.items(): - if not v.enabled(self.adjusted_flags): + if not v.enabled(self.flags): # pragma: no cover continue m = v.pattern.match(pattern, index) if m: diff --git a/soupsieve/css_types.py b/soupsieve/css_types.py index 14514b4a..2dd7a5ec 100644 --- a/soupsieve/css_types.py +++ b/soupsieve/css_types.py @@ -67,15 +67,16 @@ def __init__(self, name, prefix): class SelectorAttribute(util.Immutable): """Selector attribute rule.""" - __slots__ = ("attribute", "prefix", "pattern", "_hash") + __slots__ = ("attribute", "prefix", "pattern", "xml_type_pattern", "_hash") - def __init__(self, attribute, prefix, pattern): + def __init__(self, attribute, prefix, pattern, xml_type_pattern): """Initialize.""" super().__init__( attribute=attribute, prefix=prefix, - pattern=pattern + pattern=pattern, + xml_type_pattern=xml_type_pattern ) diff --git a/soupsieve/util.py b/soupsieve/util.py index f2a82847..3346346d 100644 --- a/soupsieve/util.py +++ b/soupsieve/util.py @@ -9,13 +9,15 @@ HTML = 0x2 XHTML = 0x4 XML = 0x8 - -MODE_MSK = 0xF -DEFAULT_MODE = HTML5 +DEPRECATED_FLAGS = HTML5 | HTML | XHTML | XML TAG = bs4.Tag -CHILD = (TAG, bs4.Doctype, bs4.Declaration, bs4.CData, bs4.ProcessingInstruction) COMMENT = bs4.Comment +DECLARATION = bs4.Declaration +CDATA = bs4.CData +PROC_INSTRUCT = bs4.ProcessingInstruction +NAV_STRINGS = bs4.NavigableString +NON_CONTENT_STRINGS = (COMMENT, DECLARATION, CDATA, PROC_INSTRUCT) LC_A = ord('a') LC_Z = ord('z') @@ -90,6 +92,15 @@ def __setattr__(self, name, value): raise AttributeError("'{}' is immutable".format(self.__class__.__name__)) + def __repr__(self): # pragma: no cover + """Representation.""" + + return "{}({})".format( + self.__base__(), ', '.join(["{}={!r}".format(k, getattr(self, k)) for k in self.__slots__[:-1]]) + ) + + __str__ = __repr__ + class ImmutableDict(Mapping): """Hashable, immutable dictionary.""" @@ -153,3 +164,13 @@ def _func(*args, **kwargs): return func(*args, **kwargs) return _func return _decorator + + +def warn_deprecated(message, stacklevel=2): + """Warn deprecated.""" + + warnings.warn( + message, + category=DeprecationWarning, + stacklevel=stacklevel + ) diff --git a/tests/test_extra.py b/tests/test_extra.py index 8b215989..9bbf3aae 100644 --- a/tests/test_extra.py +++ b/tests/test_extra.py @@ -8,7 +8,6 @@ ``` """ from . import util -import soupsieve as sv class TestLevel1(util.TestCase): @@ -29,33 +28,69 @@ def test_contains(self): markup, 'body span:contains(that)', ['2'], - flags=sv.HTML5 + flags=util.HTML5 ) self.assert_selector( markup, 'body span:contains(" that ")', ['2'], - flags=sv.HTML5 + flags=util.HTML5 ) self.assert_selector( markup, 'body :contains(" that ")', ['1', '2'], - flags=sv.HTML5 + flags=util.HTML5 ) self.assert_selector( markup, 'body :contains( "Testing" )', ['1'], - flags=sv.HTML5 + flags=util.HTML5 ) self.assert_selector( markup, 'body :contains(bad)', [], - flags=sv.HTML5 + flags=util.HTML5 + ) + + def test_contains_escapes(self): + """Test tag.""" + + markup = """ +

Testing + thatcontains works.
+ """ + + self.assert_selector( + markup, + 'body span:contains("\nthat")', + ['2'], + flags=util.HTML5 + ) + + def test_contains_cdata(self): + """Test tag.""" + + markup = """ +
Testing that contains works.
+ """ + + self.assert_selector( + markup, + 'body *:contains("that")', + ['1'], + flags=util.HTML5 + ) + + self.assert_selector( + markup, + '*:contains("that")', + ['1', '2'], + flags=util.XML ) diff --git a/tests/test_level1.py b/tests/test_level1.py index 86fbbfea..7eb24dd2 100644 --- a/tests/test_level1.py +++ b/tests/test_level1.py @@ -17,7 +17,6 @@ - `:active`: No elements in our environment can be "active", so this makes no sense in our context. """ from . import util -import soupsieve as sv class TestLevel1(util.TestCase): @@ -35,7 +34,7 @@ def test_tag(self): """, "span", ["1"], - flags=sv.HTML5 + flags=util.HTML5 ) def test_tags(self): @@ -51,7 +50,7 @@ def test_tags(self): """, "span, a", ["1", "2"], - flags=sv.HTML5 + flags=util.HTML5 ) def test_child(self): @@ -67,7 +66,7 @@ def test_child(self): """, "div span", ["1"], - flags=sv.HTML5 + flags=util.HTML5 ) def test_id(self): @@ -85,14 +84,14 @@ def test_id(self): markup, "#1", ["1"], - flags=sv.HTML5 + flags=util.HTML5 ) self.assert_selector( markup, "a#2", ["2"], - flags=sv.HTML5 + flags=util.HTML5 ) def test_class(self): @@ -110,21 +109,21 @@ def test_class(self): markup, ".foo", ["1"], - flags=sv.HTML5 + flags=util.HTML5 ) self.assert_selector( markup, "a.bar", ["2"], - flags=sv.HTML5 + flags=util.HTML5 ) self.assert_selector( markup, ".foo", ["1"], - flags=sv.XHTML + flags=util.XHTML ) def test_classes(self): @@ -144,7 +143,7 @@ def test_classes(self): markup, "a.foo.bar", ["4"], - flags=sv.HTML5 + flags=util.HTML5 ) def test_escapes(self): @@ -162,5 +161,5 @@ def test_escapes(self): markup, ".foo\\:bar\\3a foobar", ["1"], - flags=sv.HTML5 + flags=util.HTML5 ) diff --git a/tests/test_level2.py b/tests/test_level2.py index 22e141cb..eedd9281 100644 --- a/tests/test_level2.py +++ b/tests/test_level2.py @@ -23,7 +23,6 @@ - `:focus`: Items cannot be focused in our environment, so this has little meaning and will not be implemented. """ from . import util -import soupsieve as sv class TestLevel2(util.TestCase): @@ -50,7 +49,7 @@ def test_direct_child(self): markup, "div > span", ["3"], - flags=sv.HTML5 + flags=util.HTML5 ) # No spaces @@ -58,7 +57,7 @@ def test_direct_child(self): markup, "div>span", ["3"], - flags=sv.HTML5 + flags=util.HTML5 ) def test_direct_sibling(self): @@ -82,7 +81,7 @@ def test_direct_sibling(self): markup, "span + span", ["5", "6"], - flags=sv.HTML5 + flags=util.HTML5 ) # No spaces @@ -90,7 +89,7 @@ def test_direct_sibling(self): markup, "span+span", ["5", "6"], - flags=sv.HTML5 + flags=util.HTML5 ) # Complex @@ -98,7 +97,7 @@ def test_direct_sibling(self): markup, "span#4 + span#5", ["5"], - flags=sv.HTML5 + flags=util.HTML5 ) def test_wild_tag(self): @@ -119,7 +118,7 @@ def test_wild_tag(self): """, "body *", ["0", "1", "2", "3", "4", "5", "6", "div", "pre"], - flags=sv.HTML5 + flags=util.HTML5 ) def test_attribute(self): @@ -142,7 +141,7 @@ def test_attribute(self): markup, "[href]", ["2"], - flags=sv.HTML5 + flags=util.HTML5 ) # With spaces @@ -150,7 +149,7 @@ def test_attribute(self): markup, "[ href ]", ["2"], - flags=sv.HTML5 + flags=util.HTML5 ) def test_multi_attribute(self): @@ -172,7 +171,7 @@ def test_multi_attribute(self): """, "span[id].test[data-test=test]", ["5"], - flags=sv.HTML5 + flags=util.HTML5 ) def test_attribute_equal(self): @@ -196,7 +195,7 @@ def test_attribute_equal(self): markup, '[id=5]', ["5"], - flags=sv.HTML5 + flags=util.HTML5 ) # Single quoted @@ -204,7 +203,7 @@ def test_attribute_equal(self): markup, "[id='5']", ["5"], - flags=sv.HTML5 + flags=util.HTML5 ) # Double quoted @@ -212,7 +211,7 @@ def test_attribute_equal(self): markup, '[id="5"]', ["5"], - flags=sv.HTML5 + flags=util.HTML5 ) # With spaces @@ -220,35 +219,35 @@ def test_attribute_equal(self): markup, '[ id = "5" ]', ["5"], - flags=sv.HTML5 + flags=util.HTML5 ) self.assert_selector( markup, '[ID="5"]', ["5"], - flags=sv.HTML5 + flags=util.HTML5 ) self.assert_selector( markup, '[ id = "5" ]', ["5"], - flags=sv.HTML + flags=util.HTML ) self.assert_selector( markup, '[ID="5"]', ["5"], - flags=sv.HTML + flags=util.HTML ) self.assert_selector( '', '[ id = "5" ]', [], - flags=sv.HTML + flags=util.HTML ) def test_attribute_type(self): @@ -275,14 +274,14 @@ def test_attribute_type(self): markup, '[type="test"]', ["0", '2'], - flags=sv.HTML5 + flags=util.HTML5 ) self.assert_selector( markup, '[type="test"]', ['2'], - flags=sv.XML + flags=util.XML ) def test_attribute_start_dash(self): @@ -303,7 +302,7 @@ def test_attribute_start_dash(self): """, "[lang|=en]", ["0"], - flags=sv.HTML5 + flags=util.HTML5 ) def test_attribute_contains_space(self): @@ -327,7 +326,7 @@ def test_attribute_contains_space(self): markup, "[class~=test2]", ["0"], - flags=sv.HTML5 + flags=util.HTML5 ) # Start of list @@ -335,7 +334,7 @@ def test_attribute_contains_space(self): markup, "[class~=test-a]", ["pre"], - flags=sv.HTML5 + flags=util.HTML5 ) # End of list @@ -343,7 +342,7 @@ def test_attribute_contains_space(self): markup, "[class~=test-b]", ["pre"], - flags=sv.HTML5 + flags=util.HTML5 ) def test_first_child(self): @@ -364,5 +363,5 @@ def test_first_child(self): """, "span:first-child", ["1", "4"], - flags=sv.HTML5 + flags=util.HTML5 ) diff --git a/tests/test_level3.py b/tests/test_level3.py index 76f4184e..9d720095 100644 --- a/tests/test_level3.py +++ b/tests/test_level3.py @@ -33,7 +33,6 @@ Is this even useful in the context of how Soup Sieve would be used? """ from . import util -import soupsieve as sv class TestLevel3(util.TestCase): @@ -57,7 +56,7 @@ def test_distant_sibling(self): """, "p ~ span", ["3"], - flags=sv.HTML5 + flags=util.HTML5 ) def test_not(self): @@ -80,21 +79,21 @@ def test_not(self): markup, 'div :not([id="1"])', ["0", "2", "3", "4", "5", "6", "pre"], - flags=sv.HTML5 + flags=util.HTML5 ) self.assert_selector( markup, 'div :NOT([id="1"])', ["0", "2", "3", "4", "5", "6", "pre"], - flags=sv.HTML5 + flags=util.HTML5 ) self.assert_selector( markup, 'span:not([id="1"])', ["3", "4", "5", "6"], - flags=sv.HTML5 + flags=util.HTML5 ) def test_attribute_begins(self): @@ -115,7 +114,7 @@ def test_attribute_begins(self): """, "[class^=here]", ["0"], - flags=sv.HTML5 + flags=util.HTML5 ) def test_attribute_end(self): @@ -136,7 +135,7 @@ def test_attribute_end(self): """, "[class$=words]", ["0"], - flags=sv.HTML5 + flags=util.HTML5 ) def test_attribute_contains(self): @@ -159,7 +158,7 @@ def test_attribute_contains(self): markup, "[class*=words]", ["0", "3", "pre"], - flags=sv.HTML5 + flags=util.HTML5 ) def test_root(self): @@ -189,14 +188,14 @@ def test_root(self): markup, ":root", ["root"], - flags=sv.HTML5 + flags=util.HTML5 ) self.assert_selector( markup, ":root > body > div", ["div"], - flags=sv.HTML5 + flags=util.HTML5 ) def test_empty(self): @@ -220,7 +219,7 @@ def test_empty(self): markup, "body :empty", ["4", "5", "6", "8"], - flags=sv.HTML5 + flags=util.HTML5 ) def test_last_child(self): @@ -243,14 +242,14 @@ def test_last_child(self): markup, "span:last-child", ["1", "6"], - flags=sv.HTML5 + flags=util.HTML5 ) self.assert_selector( markup, "span:LAST-CHILD", ["1", "6"], - flags=sv.HTML5 + flags=util.HTML5 ) def test_only_child(self): @@ -271,7 +270,7 @@ def test_only_child(self): """, "span:only-child", ["1"], - flags=sv.HTML5 + flags=util.HTML5 ) def test_namespace(self): @@ -313,7 +312,7 @@ def test_namespace(self): "foo": "http://me.com/namespaces/foofoo", "bar": "http://me.com/namespaces/foobar" }, - flags=sv.XML + flags=util.XML ) self.assert_selector( @@ -324,7 +323,7 @@ def test_namespace(self): "foo": "http://me.com/namespaces/foofoo", "bar": "http://me.com/namespaces/foobar" }, - flags=sv.XML + flags=util.XML ) self.assert_selector( @@ -335,7 +334,7 @@ def test_namespace(self): "foo": "http://me.com/namespaces/foofoo", "bar": "http://me.com/namespaces/foobar" }, - flags=sv.XML + flags=util.XML ) self.assert_selector( @@ -346,7 +345,7 @@ def test_namespace(self): "foo": "http://me.com/namespaces/foofoo", "bar": "http://me.com/namespaces/foobar" }, - flags=sv.XML + flags=util.XML ) # Because we employ level 4 selectors @@ -359,7 +358,7 @@ def test_namespace(self): "foo": "http://me.com/namespaces/foofoo", "bar": "http://me.com/namespaces/foobar" }, - flags=sv.XML + flags=util.XML ) # Now that we apply a default namespace. Null space. @@ -372,7 +371,7 @@ def test_namespace(self): "foo": "http://me.com/namespaces/foofoo", "bar": "http://me.com/namespaces/foobar" }, - flags=sv.XML + flags=util.XML ) self.assert_selector( @@ -384,7 +383,7 @@ def test_namespace(self): "foo": "http://me.com/namespaces/foofoo", "bar": "http://me.com/namespaces/foobar" }, - flags=sv.XML + flags=util.XML ) # Because no prefix is specified for "other" in the above document, @@ -400,7 +399,7 @@ def test_namespace(self): "bar": "http://me.com/namespaces/foobar", "other": "http://me.com/namespaces/other" }, - flags=sv.XML + flags=util.XML ) def test_attribute_namespace(self): @@ -441,7 +440,7 @@ def test_attribute_namespace(self): '[xlink|href*=forw],[xlink|href="images/sprites.svg#icon-redo"]', ['1', '2'], namespaces={"xlink": "http://www.w3.org/1999/xlink"}, - flags=sv.HTML5 + flags=util.HTML5 ) self.assert_selector( @@ -449,7 +448,7 @@ def test_attribute_namespace(self): '[bad|href*=forw]', [], namespaces={"xlink": "http://www.w3.org/1999/xlink"}, - flags=sv.HTML5 + flags=util.HTML5 ) self.assert_selector( @@ -457,7 +456,7 @@ def test_attribute_namespace(self): '[\\:href]', ['4'], namespaces={"xlink": "http://www.w3.org/1999/xlink"}, - flags=sv.HTML5 + flags=util.HTML5 ) def test_attribute_namespace_xhtml(self): @@ -499,7 +498,7 @@ def test_attribute_namespace_xhtml(self): '[xlink|href*=forw],[xlink|href="images/sprites.svg#icon-redo"]', ['1', '2'], namespaces={"xlink": "http://www.w3.org/1999/xlink"}, - flags=sv.XHTML + flags=util.XHTML ) def test_first_of_type(self): @@ -524,21 +523,21 @@ def test_first_of_type(self): markup, "p:first-of-type", ['0'], - flags=sv.HTML5 + flags=util.HTML5 ) self.assert_selector( markup, "span:first-of-type", ['2'], - flags=sv.HTML5 + flags=util.HTML5 ) self.assert_selector( markup, "body :first-of-type", ['0', '2'], - flags=sv.HTML5 + flags=util.HTML5 ) def test_last_of_type(self): @@ -563,21 +562,21 @@ def test_last_of_type(self): markup, "p:last-of-type", ['10'], - flags=sv.HTML5 + flags=util.HTML5 ) self.assert_selector( markup, "span:last-of-type", ['11'], - flags=sv.HTML5 + flags=util.HTML5 ) self.assert_selector( markup, "body :last-of-type", ['10', '11'], - flags=sv.HTML5 + flags=util.HTML5 ) def test_only_of_type(self): @@ -602,7 +601,7 @@ def test_only_of_type(self): markup, "p:only-of-type", ['1', '4'], - flags=sv.HTML5 + flags=util.HTML5 ) def test_nth_child(self): @@ -657,63 +656,63 @@ def test_nth_child(self): markup, "p:nth-child(-2)", [], - flags=sv.HTML5 + flags=util.HTML5 ) self.assert_selector( markup, "p:nth-child(2)", ['1'], - flags=sv.HTML5 + flags=util.HTML5 ) self.assert_selector( markup, "p:nth-child(9n - 1)", ['7'], - flags=sv.HTML5 + flags=util.HTML5 ) self.assert_selector( markup, "p:nth-child(2n + 1)", ['0', '8', '10'], - flags=sv.HTML5 + flags=util.HTML5 ) self.assert_selector( markup, "p:nth-child(-n+3)", ['0', '1'], - flags=sv.HTML5 + flags=util.HTML5 ) self.assert_selector( markup, "span:nth-child(-n+3)", ['2'], - flags=sv.HTML5 + flags=util.HTML5 ) self.assert_selector( markup, "body *:nth-child(-n+3)", ['0', '1', '2'], - flags=sv.HTML5 + flags=util.HTML5 ) self.assert_selector( markup, "p:nth-child(odd)", ['0', '8', '10'], - flags=sv.HTML5 + flags=util.HTML5 ) self.assert_selector( markup, "p:nth-child(even)", ['1', '7', '9'], - flags=sv.HTML5 + flags=util.HTML5 ) def test_nth_last_child(self): @@ -738,14 +737,14 @@ def test_nth_last_child(self): markup, "p:nth-last-child(2)", ['10'], - flags=sv.HTML5 + flags=util.HTML5 ) self.assert_selector( markup, "p:nth-last-child(2n + 1)", ['1', '7', '9'], - flags=sv.HTML5 + flags=util.HTML5 ) def test_nth_of_type(self): @@ -770,28 +769,28 @@ def test_nth_of_type(self): markup, "p:nth-of-type(3)", ['7'], - flags=sv.HTML5 + flags=util.HTML5 ) self.assert_selector( markup, "p:nth-of-type(2n + 1)", ['0', '7', '9'], - flags=sv.HTML5 + flags=util.HTML5 ) self.assert_selector( markup, "span:nth-of-type(2n + 1)", ['2', '4', '6'], - flags=sv.HTML5 + flags=util.HTML5 ) self.assert_selector( markup, "body :nth-of-type(2n + 1)", ['0', '2', '4', '6', '7', '9'], - flags=sv.HTML5 + flags=util.HTML5 ) def test_nth_last_of_type(self): @@ -816,12 +815,12 @@ def test_nth_last_of_type(self): markup, "p:nth-last-of-type(3)", ['8'], - flags=sv.HTML5 + flags=util.HTML5 ) self.assert_selector( markup, "p:nth-last-of-type(2n + 1)", ['1', '8', '10'], - flags=sv.HTML5 + flags=util.HTML5 ) diff --git a/tests/test_level4.py b/tests/test_level4.py index 1271c3bb..acc43d66 100644 --- a/tests/test_level4.py +++ b/tests/test_level4.py @@ -95,14 +95,14 @@ def test_attribute_case(self): markup, "[class*=WORDS]", [], - flags=sv.HTML5 + flags=util.HTML5 ) self.assert_selector( markup, "[class*=WORDS i]", ["0", "3", "pre"], - flags=sv.HTML5 + flags=util.HTML5 ) with self.assertRaises(SyntaxError): @@ -132,14 +132,14 @@ def test_attribute_type_case(self): markup, '[type="test" s]', ['2'], - flags=sv.HTML5 + flags=util.HTML5 ) self.assert_selector( markup, '[type="test" i]', ['0', '2'], - flags=sv.XML + flags=util.XML ) def test_is_matches_where(self): @@ -157,21 +157,21 @@ def test_is_matches_where(self): markup, ":is(span, a)", ["1", "2"], - flags=sv.HTML5 + flags=util.HTML5 ) self.assert_selector( markup, ":is(span, a:matches(#2))", ["1", "2"], - flags=sv.HTML5 + flags=util.HTML5 ) self.assert_selector( markup, ":where(span, a:matches(#2))", ["1", "2"], - flags=sv.HTML5 + flags=util.HTML5 ) # Each pseudo class is evaluated separately @@ -180,7 +180,7 @@ def test_is_matches_where(self): markup, ":is(span):not(span)", [], - flags=sv.HTML5 + flags=util.HTML5 ) # Each pseudo class is evaluated separately @@ -189,7 +189,7 @@ def test_is_matches_where(self): markup, ":is(span):is(div)", [], - flags=sv.HTML5 + flags=util.HTML5 ) # Each pseudo class is evaluated separately @@ -198,7 +198,7 @@ def test_is_matches_where(self): markup, ":is(a):is(#2)", ['2'], - flags=sv.HTML5 + flags=util.HTML5 ) def test_multi_nested_not(self): @@ -221,7 +221,7 @@ def test_multi_nested_not(self): markup, 'div :not(p, :not([id=5]))', ['5'], - flags=sv.HTML5 + flags=util.HTML5 ) def test_has(self): @@ -270,49 +270,49 @@ def test_has(self): markup, 'div:not(.aaaa):has(.kkkk > p.llll)', ['4', '5', '6'], - flags=sv.HTML5 + flags=util.HTML5 ) self.assert_selector( markup, 'div:NOT(.aaaa):HAS(.kkkk > p.llll)', ['4', '5', '6'], - flags=sv.HTML5 + flags=util.HTML5 ) self.assert_selector( markup, 'p:has(+ .dddd:has(+ div .jjjj))', ['2'], - flags=sv.HTML5 + flags=util.HTML5 ) self.assert_selector( markup, 'p:has(~ .jjjj)', ['7', '8'], - flags=sv.HTML5 + flags=util.HTML5 ) self.assert_selector( markup2, 'div:has(> .bbbb, .ffff, .jjjj)', ['0', '4', '8'], - flags=sv.HTML5 + flags=util.HTML5 ) self.assert_selector( markup2, 'div:has(> :not(.bbbb, .ffff, .jjjj))', ['2', '6', '8'], - flags=sv.HTML5 + flags=util.HTML5 ) self.assert_selector( markup2, 'div:not(:has(> .bbbb, .ffff, .jjjj))', ['2', '6'], - flags=sv.HTML5 + flags=util.HTML5 ) def test_nth_child_of_s(self): @@ -337,12 +337,12 @@ def test_nth_child_of_s(self): markup, ":nth-child(2n + 1 of :is(p, span).test)", ['2', '6', '10'], - flags=sv.HTML5 + flags=util.HTML5 ) self.assert_selector( markup, ":nth-child(-n+3 of p)", ['0', '1', '7'], - flags=sv.HTML5 + flags=util.HTML5 ) diff --git a/tests/test_soupsieve.py b/tests/test_soupsieve.py index 75e03a5d..d5fca031 100644 --- a/tests/test_soupsieve.py +++ b/tests/test_soupsieve.py @@ -33,10 +33,10 @@ def test_comments(self): """ soup = bs4.BeautifulSoup(markup, 'html5lib') - comments = [str(c).strip() for c in sv.comments(soup, flags=sv.HTML5)] + comments = [str(c).strip() for c in sv.comments(soup)] self.assertEqual(sorted(comments), sorted(['before header', 'comment', "don't ignore"])) - comments = [str(c).strip() for c in sv.icomments(soup, limit=2, flags=sv.HTML5)] + comments = [str(c).strip() for c in sv.icomments(soup, limit=2)] self.assertEqual(sorted(comments), sorted(['before header', 'comment'])) def test_select(self): @@ -137,17 +137,17 @@ def test_copy_pickle(self): """Test copy and pickle.""" # Test that we can pickle and unpickle - p1 = sv.compile('p[id]', flags=sv.HTML5) + p1 = sv.compile('p[id]') sp1 = pickle.dumps(p1) pp1 = pickle.loads(sp1) self.assertTrue(pp1 == p1) # Test that we pull the same one from cache - p2 = sv.compile('p[id]', flags=sv.HTML5) + p2 = sv.compile('p[id]') self.assertTrue(p1 is p2) # Test that we compile a new one when providing a different flags - p3 = sv.compile('p[id]', flags=sv.HTML) + p3 = sv.compile('p[id]', flags=0x10) self.assertTrue(p1 is not p3) self.assertTrue(p1 != p3) @@ -183,7 +183,7 @@ def test_recompile(self): self.assertTrue(p1 is p2) with pytest.raises(ValueError): - sv.compile(p1, flags=sv.HTML) + sv.compile(p1, flags=0x10) with pytest.raises(ValueError): sv.compile(p1, namespaces={"": ""}) @@ -218,6 +218,41 @@ def test_immutable_dict(self): class TestDeprcations(unittest.TestCase): """Test Soup Sieve deprecations.""" + def test_flag_deprecations(self): + """Test flag deprecation.""" + + with warnings.catch_warnings(record=True) as w: + # Cause all warnings to always be triggered. + warnings.simplefilter("always") + + sv.compile('p', flags=sv.HTML) + self.assertTrue(len(w) == 1) + self.assertTrue(issubclass(w[-1].category, DeprecationWarning)) + + with warnings.catch_warnings(record=True) as w: + # Cause all warnings to always be triggered. + warnings.simplefilter("always") + + sv.compile('p', flags=sv.XHTML) + self.assertTrue(len(w) == 1) + self.assertTrue(issubclass(w[-1].category, DeprecationWarning)) + + with warnings.catch_warnings(record=True) as w: + # Cause all warnings to always be triggered. + warnings.simplefilter("always") + + sv.compile('p', flags=sv.XML) + self.assertTrue(len(w) == 1) + self.assertTrue(issubclass(w[-1].category, DeprecationWarning)) + + with warnings.catch_warnings(record=True) as w: + # Cause all warnings to always be triggered. + warnings.simplefilter("always") + + sv.compile('p', flags=sv.HTML5) + self.assertTrue(len(w) == 1) + self.assertTrue(issubclass(w[-1].category, DeprecationWarning)) + def test_selectiter_deprecation(self): """Test the deprecated iterator functions.""" @@ -306,12 +341,6 @@ def test_commentsiter_deprecation(self): class TestInvalid(unittest.TestCase): """Test invalid.""" - def test_invalid_mode(self): - """Test invalid mode.""" - - with self.assertRaises(ValueError): - sv.compile('p', None, sv.util.HTML | sv.util.HTML5) - def test_invalid_combination(self): """ Test invalid combination. diff --git a/tests/util.py b/tests/util.py index 14b60115..49937485 100644 --- a/tests/util.py +++ b/tests/util.py @@ -4,6 +4,11 @@ import textwrap import soupsieve as sv +HTML5 = 1 +HTML = 2 +XHTML = 4 +XML = 8 + class TestCase(unittest.TestCase): """Test case.""" @@ -11,12 +16,12 @@ class TestCase(unittest.TestCase): def assert_selector(self, markup, selectors, expected_ids, namespaces={}, flags=0): """Assert selector.""" - mode = flags & sv.util.MODE_MSK - if mode == sv.HTML: + mode = flags & 0x0F + if mode == HTML: bs_mode = 'lxml' - elif mode in (sv.HTML5, 0): + elif mode in (HTML5, 0): bs_mode = 'html5lib' - elif mode in (sv.XHTML, sv.XML): + elif mode in (XHTML, XML): bs_mode = 'xml' soup = bs4.BeautifulSoup(textwrap.dedent(markup.replace('\r\n', '\n')), bs_mode)