Permalink
Browse files

Ensure HTML is not exposed in the description member

  • Loading branch information...
adamvoss committed Apr 16, 2017
1 parent 92fc152 commit c79bf289bc498c416b1eb8c9f6aa7710a867dc08
View
@@ -5,3 +5,8 @@ The podcast parser project is a library from the gPodder project to provide an
easy and reliable way of parsing RSS- and Atom-based podcast feeds in Python.
* Web: http://gpodder.org/podcastparser/
## Automated Tests
To run the unit tests you need [`nose`](http://nose.readthedocs.io/en/latest/). If you have `nose` installed, use the `nosetests` command in the repository's root directory to run the tests.
View
@@ -674,6 +674,13 @@ def validate_episode(self):
if len(entry['chapters']) == 0:
del entry['chapters']
# Ensures `description` does not contain HTML
if 'description' in entry and is_html(entry['description']):
if 'description_html' not in entry:
entry['description_html'] = entry['description']
entry['description'] = ''
# Sets `description` to stripped `description_html` when absent
if 'description_html' in entry and not entry['description']:
entry['description'] = remove_html_tags(entry['description_html'])
@@ -833,6 +840,12 @@ def normalize_feed_url(url):
# urlunsplit might return "a slighty different, but equivalent URL"
return urlparse.urlunsplit((scheme, netloc, path, query, fragment))
def is_html(text):
"""
Tests whether the given string contains HTML encoded data
"""
html_test = re.compile(r'<[a-z][\s\S]*>', re.IGNORECASE)
return bool(html_test.search(text))
def remove_html_tags(html):
"""
@@ -0,0 +1,22 @@
{
"title": "HTML Podcast",
"episodes": [
{
"title": "Ep 1",
"description": "This is a test",
"description_html": "<h1>This is a <em>test</em></h1>",
"published": 0,
"guid": "http://example.org/example.opus",
"link": "",
"total_time": 0,
"payment_url": null,
"enclosures": [
{
"file_size": -1,
"url": "http://example.org/example.opus",
"mime_type": "application/octet-stream"
}
]
}
]
}
@@ -0,0 +1,12 @@
<rss>
<channel>
<title>HTML Podcast</title>
<item>
<title>Ep 1</title>
<enclosure url="http://example.org/example.opus"/>
<description>
<![CDATA[ <h1>This is a <em>test</em></h1> ]]>
</description>
</item>
</channel>
</rss>
@@ -0,0 +1,22 @@
{
"title": "HTML Podcast with Text Description",
"episodes": [
{
"title": "Ep 1",
"description": "This is also a test",
"description_html": "<h1>This is also a <em>test</em></h1>",
"published": 0,
"guid": "http://example.org/example.opus",
"link": "",
"total_time": 0,
"payment_url": null,
"enclosures": [
{
"file_size": -1,
"url": "http://example.org/example.opus",
"mime_type": "application/octet-stream"
}
]
}
]
}
@@ -0,0 +1,13 @@
<rss>
<channel>
<title>HTML Podcast with Text Description</title>
<item>
<title>Ep 1</title>
<enclosure url="http://example.org/example.opus"/>
<content:encoded xmlns:content="http://purl.org/rss/1.0/modules/content/">
<![CDATA[ <h1>This is also a <em>test</em></h1> ]]>
</content:encoded>
<description><![CDATA[ <h1>This is also a <em>test</em></h1> ]]></description>
</item>
</channel>
</rss>
@@ -0,0 +1,22 @@
{
"title": "HTML Podcast with Text Description",
"episodes": [
{
"title": "Ep 1",
"description": "This is also a test",
"description_html": "<h1>This is also a <em>test</em></h1>",
"published": 0,
"guid": "http://example.org/example.opus",
"link": "",
"total_time": 0,
"payment_url": null,
"enclosures": [
{
"file_size": -1,
"url": "http://example.org/example.opus",
"mime_type": "application/octet-stream"
}
]
}
]
}
@@ -0,0 +1,13 @@
<rss>
<channel>
<title>HTML Podcast with Text Description</title>
<item>
<title>Ep 1</title>
<enclosure url="http://example.org/example.opus"/>
<content:encoded xmlns:content="http://purl.org/rss/1.0/modules/content/">
<![CDATA[ <h1>This is also a <em>test</em></h1> ]]>
</content:encoded>
<description><![CDATA[ <h1>This text will be discarded</h1> ]]></description>
</item>
</channel>
</rss>

0 comments on commit c79bf28

Please sign in to comment.