Permalink
Browse files

Dont show noscript tags as they generally just pollute the content

  • Loading branch information...
1 parent 51a3976 commit 889849990a04cd4f4f48b5764ae0ec49f196aa36 @dcramer committed Sep 15, 2011
Showing with 323 additions and 2 deletions.
  1. +1 −1 decruft/decruft.py
  2. +4 −1 setup.py
  3. 0 tests/__init__.py
  4. +301 −0 tests/fixtures/disqus.html
  5. +17 −0 tests/tests.py
View
@@ -70,7 +70,7 @@ def summary(self):
while True:
self._html(True)
# XXX: Cleaner should handle this, right?
- [i.drop_tree() for i in self.tags(self.html, 'script', 'style')]
+ [i.drop_tree() for i in self.tags(self.html, 'script', 'style', 'noscript')]
if ruthless: self.remove_unlikely_candidates()
self.transform_misused_divs_into_paragraphs()
View
@@ -1,4 +1,3 @@
-import os
from setuptools import setup, find_packages
setup(name='decruft',
@@ -7,8 +6,12 @@
author='Sharmila.Gopirajan',
url='http://code.google.com/p/decruft/',
packages=find_packages(),
+ test_suite='unittest2.collector',
install_requires=[
'lxml',
],
+ tests_require=[
+ 'unittest2',
+ ],
include_package_data=True,
)
View
No changes.
View
@@ -0,0 +1,301 @@
+
+<!DOCTYPE html>
+<html>
+<head>
+ <link rel="shortcut icon" href="http://mediacdn.disqus.com/1291160803/img/favicon.ico" type="image/vnd.microsoft.icon" />
+ <meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
+ <title>DISQUS Code - Overseer</title>
+
+ <link href="http://mediacdn.disqus.com/1291160803/css/base.css" type="text/css" rel="stylesheet" />
+ <link href="http://mediacdn.disqus.com/1291160803/css/dashboard.css" type="text/css" rel="stylesheet" />
+ <link href="/css/global.css" type="text/css" rel="stylesheet" />
+
+ <script type="text/javascript" charset="utf-8">
+ if (window.context === undefined) {
+ var context = {};
+ }
+
+ document.domain = 'disqus.com';
+
+ context.disqusDomain = 'disqus.com';
+ context.disqusUrl = 'http://' + context.disqusDomain;
+ context.apiUrl = context.disqusUrl + '/api/3.0/';
+ context.mediaUrl = 'http://mediacdn.disqus.com/1291936940';
+ context.apiPublicKey = 'nVV8IwJiiwupZMqyr9Giae6zKMOb0DIhw6ejfTV1rIgZYAP66MvvYrUdbVTobFfJ';
+
+ context.urlMap = {
+ 'features': 'http://disqus.com/features/',
+ 'explore': 'http://disqus.com/explore/',
+ 'signup': 'http://disqus.com/admin/register/',
+ 'dashboard': 'http://disqus.com/dashboard/',
+ 'admin': 'http://disqus.com/admin/',
+ 'logout': 'http://disqus.com/logout/',
+ 'disqus_admin': 'http://disqus.com/disqus-admin/',
+ 'home': 'http://disqus.com'
+ };
+ context.navMap = {
+ 'features': '',
+ 'explore': '',
+ 'signup': '',
+ 'dashboard': ' selected',
+ 'admin': '',
+ 'disqus_admin': ''
+ };
+ // http://stackoverflow.com/questions/260749/what-is-the-best-way-to-get-and-set-a-single-cookie-value-using-javascript
+ function readCookie(name) {
+ var nameEQ = name + "=";
+ var ca = document.cookie.split(';');
+ for(var i=0;i < ca.length;i++) {
+ var c = ca[i];
+ while (c.charAt(0)==' ') c = c.substring(1,c.length);
+ if (c.indexOf(nameEQ) == 0) return c.substring(nameEQ.length,c.length);
+ }
+ return null;
+ }
+
+ var data = readCookie('disqusauth');
+ if (data) {
+ data = data.split('|');
+ if (parseInt(data[0]) >= 1) {
+ data = {
+ 'username': data[1],
+ 'is_staff': parseInt(data[2], 10),
+ 'has_forums': parseInt(data[3]),
+ 'datetime_formatting': parseInt(data[4], 10) ? 'absolute' : 'relative',
+ 'tz_offset': data[5]
+ };
+ } else {
+ data = {
+ 'username': data[0],
+ 'is_staff': parseInt(data[1], 10),
+ 'has_forums': data[2].length > 0,
+ 'datetime_formatting': parseInt(data[3], 10) ? 'absolute' : 'relative',
+ 'tz_offset': data[4]
+ };
+ }
+ }
+ context.auth = data || {};
+ // Disqus Embed settings
+ var disqus_shortname = 'disqus';
+ </script>
+
+ <script src="http://mediacdn.disqus.com/1291160803/js/src/lib/jquery.js" type="text/javascript"></script>
+ <script src="http://mediacdn.disqus.com/1291936940/build/system/sdk.js" type="text/javascript" charset="utf-8"></script>
+ <script src="http://mediacdn.disqus.com/1291936940/js/dist/base.js" type="text/javascript" charset="utf-8"></script>
+ <script src="http://mediacdn.disqus.com/1291936940/js/src/global.js" type="text/javascript" charset="utf-8"></script>
+ <script src="/js/global.js" type="text/javascript" charset="utf-8"></script>
+</head>
+<body>
+ <div id="header">
+ <div id="global-nav">
+ <div class="container">
+ <ul>
+ <li><a href="http://disqus.com/about/">About</a></li>
+ <li><a href="http://docs.disqus.com/">Help</a></li>
+ <li><a href="http://blog.disqus.com/">Blog</a></li>
+ </ul>
+ </div>
+ </div>
+ <div id="local-nav">
+ <div class="container">
+ <a href="http://code.disqus.com/" id="logo"><img src="http://mediacdn.disqus.com/1291160803/img/disqus-logo.png" alt="DISQUS" title="Disqus - Discover your community"/> CODE</a>
+
+ <ul id="account-nav">
+ <li><a href="/">Blog</a></li>
+ <li class="selected"><a href="/code/">Open Source</a></li>
+ <li><a href="/technology/">Technology</a></li>
+ <li><a href="http://lanyrd.com/search/?q=disqus">Presentations</a></li>
+ </ul>
+
+ </div>
+ </div>
+ </div>
+
+ <div id="content">
+ <div class="container">
+ <div id="code" class="box clearfix">
+ <div class="secondary">
+ <ul id="sidebar">
+ <li class="module">
+ <h3>The Engineers</h3>
+ <ul class="people">
+
+ <li>
+ <a href="http://disqus.com/Jason/" class="profile-launcher" data-profile-username="Jason" title="Jason">
+ <img src="http://disqus.com/api/users/avatars/Jason.jpg" alt="">
+ <cite>Jason</cite>
+ </a>
+ </li>
+
+ <li>
+ <a href="http://disqus.com/antonkovalyov/" class="profile-launcher" data-profile-username="antonkovalyov" title="antonkovalyov">
+ <img src="http://disqus.com/api/users/avatars/antonkovalyov.jpg" alt="">
+ <cite>antonkovalyov</cite>
+ </a>
+ </li>
+
+ <li>
+ <a href="http://disqus.com/bretthoerner/" class="profile-launcher" data-profile-username="bretthoerner" title="bretthoerner">
+ <img src="http://disqus.com/api/users/avatars/bretthoerner.jpg" alt="">
+ <cite>bretthoerner</cite>
+ </a>
+ </li>
+
+ <li>
+ <a href="http://disqus.com/benvinegar/" class="profile-launcher" data-profile-username="benvinegar" title="benvinegar">
+ <img src="http://disqus.com/api/users/avatars/benvinegar.jpg" alt="">
+ <cite>benvinegar</cite>
+ </a>
+ </li>
+
+ <li>
+ <a href="http://disqus.com/zeeg/" class="profile-launcher" data-profile-username="zeeg" title="zeeg">
+ <img src="http://disqus.com/api/users/avatars/zeeg.jpg" alt="">
+ <cite>zeeg</cite>
+ </a>
+ </li>
+
+ <li>
+ <a href="http://disqus.com/dz/" class="profile-launcher" data-profile-username="dz" title="dz">
+ <img src="http://disqus.com/api/users/avatars/dz.jpg" alt="">
+ <cite>dz</cite>
+ </a>
+ </li>
+
+ <li>
+ <a href="http://disqus.com/sugarc0de/" class="profile-launcher" data-profile-username="sugarc0de" title="sugarc0de">
+ <img src="http://disqus.com/api/users/avatars/sugarc0de.jpg" alt="">
+ <cite>sugarc0de</cite>
+ </a>
+ </li>
+
+ <li>
+ <a href="http://disqus.com/gjcourt/" class="profile-launcher" data-profile-username="gjcourt" title="gjcourt">
+ <img src="http://disqus.com/api/users/avatars/gjcourt.jpg" alt="">
+ <cite>gjcourt</cite>
+ </a>
+ </li>
+
+ </ul>
+ </li>
+ <li class="module">
+ <h3>Open Source <a href="/code/" class="button small xtrasmall">More</a></h3>
+ <ul class="projects">
+
+ <li>
+ <h3><a href="/code/projects/overseer.html">Overseer</a></h3>
+ <div class="about">
+ <p>Simple status board</p>
+ </div>
+ </li>
+
+ <li>
+ <h3><a href="/code/projects/sentry.html">Sentry</a></h3>
+ <div class="about">
+ <p>Real-time exception tracking</p>
+ </div>
+ </li>
+
+ <li>
+ <h3><a href="/code/projects/nexus.html">Nexus</a></h3>
+ <div class="about">
+ <p>Extensible administration</p>
+ </div>
+ </li>
+
+ <li>
+ <h3><a href="/code/projects/gargoyle.html">Gargoyle</a></h3>
+ <div class="about">
+ <p>Feature switches</p>
+ </div>
+ </li>
+
+ </ul>
+ </li>
+ <li class="banner">
+ <a href='https://developer.mozilla.org/en/JavaScript/Reference/Global_Objects/Array' title='JavaScript Array length'><img src='http://static.jsconf.us/promotejsh.gif' height='150' width='180' alt='JavaScript Array length'/></a>
+ </li>
+ </ul>
+ </div>
+ <div class="primary">
+ <div class="main" id="project">
+ <h1>Overseer</h1>
+
+ <p>Overseer is a simple status board app written in Django.</p>
+
+ <p>
+
+ <a class="button small xtrasmall" href="https://github.com/disqus/overseer">GitHub</a>
+
+
+ <a class="button small xtrasmall" href="http://pypi.python.org/pypi/overseer">PyPi</a>
+
+
+ <a class="button small xtrasmall" href="https://github.com/disqus/overseer/issues">Issue Tracker</a>
+
+
+ <a class="button small xtrasmall" href="https://groups.google.com/forum/#!forum/disqus-opensource">Google Group</a>
+
+ </p>
+
+ <img src="http://f.cl.ly/items/2y3J0h0w2M3h3E322A1V/Screen%20shot%202011-01-21%20at%202.19.52%20PM.png"/>
+
+ <div id="disqus_thread"></div>
+ <script type="text/javascript">
+ /**
+ * var disqus_identifier; [Optional but recommended: Define a unique identifier (e.g. post id or slug) for this thread]
+ */
+ var disqus_identifier = 'project-Overseer';
+ (function() {
+ var dsq = document.createElement('script'); dsq.type = 'text/javascript'; dsq.async = true;
+ dsq.src = 'http://disqus.disqus.com/embed.js';
+ (document.getElementsByTagName('head')[0] || document.getElementsByTagName('body')[0]).appendChild(dsq);
+ })();
+ </script>
+ <noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript=davidcramer">comments powered by Disqus.</a></noscript>
+ <a href="http://disqus.com" class="dsq-brlink">blog comments powered by <span class="logo-disqus">Disqus</span></a>
+
+</div>
+ </div>
+ </div>
+ </div>
+ </div>
+
+ <div id="footer">
+ <div class="container">
+ <p class="copyright">Copyright 2007-2010 &middot; <a href="/"class="disqus">DISQUS</a></p>
+ <ul>
+ <li><a href="http://disqus.com/about/">About</a></li>
+ <li><a href="http://disqus.com/features/">Features</a></li>
+ <li><a href="http://disqus.com/explore/">Explore</a></li>
+ <li><a href="http://disqus.com/addons/">Add-ons</a></li>
+ <li><a href="http://docs.disqus.com">Help</a></li>
+ <li><a href="http://blog.disqus.com">Blog</a></li>
+ <li><a href="http://disqus.com/api/">API</a></li>
+ <li><a href="http://disqus.com/jobs/">Jobs</a></li>
+ <li><a href="http://docs.disqus.com/kb/terms-and-policies/">Terms and Policies</a></li>
+ </ul>
+ </div>
+ </div>
+ <!-- Start Quantcast tag -->
+ <script type="text/javascript">
+ _qoptions={
+ qacct:"p-94WKwgUwZHlfo"
+ };
+ </script>
+ <script type="text/javascript" src="http://edge.quantserve.com/quant.js"></script>
+ <noscript>
+ <img src="http://pixel.quantserve.com/pixel/p-94WKwgUwZHlfo.gif" style="display: none;" border="0" height="1" width="1" alt="Quantcast"/>
+ </noscript>
+ <!-- End Quantcast tag -->
+ <script type="text/javascript">
+ var gaJsHost = (("https:" == document.location.protocol) ? "https://ssl." : "http://www.");
+ document.write(unescape("%3Cscript src='" + gaJsHost + "google-analytics.com/ga.js' type='text/javascript'%3E%3C/script%3E"));
+ </script>
+ <script type="text/javascript">
+ try {
+ var pageTracker = _gat._getTracker("UA-1410476-6");
+ pageTracker._trackPageview();
+ } catch(err) {}</script>
+</body>
+</html>
View
@@ -0,0 +1,17 @@
+import os.path
+import unittest2
+
+from decruft import Document
+
+fixture_path = os.path.join(os.path.dirname(__file__), 'fixtures')
+
+class DecruftTestCase(unittest2.TestCase):
+ def test_disqus_noscript_tag(self):
+ content = open(os.path.join(fixture_path, 'disqus.html')).read()
+ summary = Document(content).summary().encode('utf-8','ignore')
+ self.assertNotIn('<noscript>Please enable JavaScript to view the <a href="http://disqus.com/?ref_noscript=davidcramer">comments powered by Disqus.</a></noscript>', summary)
+
+ def test_comments_powered_by_disqus(self):
+ content = open(os.path.join(fixture_path, 'disqus.html')).read()
+ summary = Document(content).summary().encode('utf-8','ignore')
+ self.assertNotIn('<a href="http://disqus.com" class="dsq-brlink">blog comments powered by <span class="logo-disqus">Disqus</span></a>', summary)

0 comments on commit 8898499

Please sign in to comment.