Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

Ignore HTML5 start tags 'footer' and 'nav' too

  • Loading branch information...
commit d26ff98ffcbcd71a0b79ae5305d4b96181fc70ed 1 parent b515473
@ercpe ercpe authored
Showing with 1 addition and 1 deletion.
  1. +1 −1  webarticle2text.py
View
2  webarticle2text.py
@@ -92,7 +92,7 @@ def __init__(self):
def handle_starttag(self, tag, attrs):
ignore0 = self._ignore
tag = tag.lower()
- if tag in ('script','style','option','ul','li','legend','object','noscript','label'): # 'h1','h2','h3','h4','h5','h6',
+ if tag in ('script','style','option','ul','li','legend','object','noscript','label', 'footer', 'nav', 'aside'): # 'h1','h2','h3','h4','h5','h6',
self._ignore = True
attrd = dict(attrs)
self._lasttag = tag.lower()

0 comments on commit d26ff98

Please sign in to comment.
Something went wrong with that request. Please try again.