Permalink
Browse files

Stripping all class names from article content

  • Loading branch information...
chetan51 committed Jan 12, 2011
1 parent 5614744 commit d5574b233139b2e3a3938462f2e3c7004924c6b6
Showing with 26 additions and 0 deletions.
  1. +26 −0 lib/readability.js
View
@@ -2129,6 +2129,31 @@ function removeReadabilityArtifacts() {
}
}
+function removeClassNames(e) {
+ var e = e || document;
+ var cur = e.firstChild;
+
+ if(!e) {
+ return; }
+
+ // Remove any root class names, if we're able.
+ if(e.className) {
+ e.className = "";
+ }
+
+ // Go until there are no more child nodes
+ while ( cur !== null ) {
+ if ( cur.nodeType === 1 ) {
+ // Remove class names
+ if(e.className) {
+ e.className = "";
+ }
+ removeClassNames(cur);
+ }
+ cur = cur.nextSibling;
+ }
+}
+
function start(w, cb) {
window = w;
document = w.document;
@@ -2147,6 +2172,7 @@ function start(w, cb) {
MyProfiler.report();
removeReadabilityArtifacts();
+ removeClassNames();
//dbg('[Readability] done');
cb(document.body.innerHTML);

0 comments on commit d5574b2

Please sign in to comment.