Skip to content

Commit

Permalink
Small tweaks to the list of block-like elements.
Browse files Browse the repository at this point in the history
  • Loading branch information
coleifer committed Jun 13, 2023
1 parent 7140bf4 commit 48d7a3a
Showing 1 changed file with 8 additions and 5 deletions.
13 changes: 8 additions & 5 deletions micawber/parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,11 +25,14 @@
standalone_url_re = re.compile('^\s*' + url_pattern + '\s*$')

block_elements = set([
'address', 'blockquote', 'center', 'dir', 'div', 'dl', 'fieldset', 'form',
'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'isindex', 'menu', 'noframes',
'noscript', 'ol', 'p', 'pre', 'table', 'ul', 'dd', 'dt', 'frameset', 'li',
'tbody', 'td', 'tfoot', 'th', 'thead', 'tr', 'button', 'del', 'iframe',
'ins', 'map', 'object', 'script', '[document]'
'address', 'article', 'aside', 'blockquote', 'canvas', 'center', 'dir',
'dd', 'div', 'dl', 'dt', 'fieldset', 'figcaption', 'figure', 'footer',
'form', 'frameset', 'h1', 'h2', 'h3', 'h4', 'h5', 'h6', 'header', 'hr',
'isindex', 'li', 'main', 'menu', 'nav', 'noframes', 'noscript', 'ol', 'p',
'pre', 'section', 'table', 'tbody', 'td', 'tfoot', 'th', 'thead', 'tr',
'ul',
# Additional elements.
'button', 'del', 'iframe', 'ins', 'map', 'object', 'script', '[document]',
])

skip_elements = set(['a', 'pre', 'code', 'input', 'textarea', 'select'])
Expand Down

0 comments on commit 48d7a3a

Please sign in to comment.