Skip to content

Commit

Permalink
enabled readabilitySAX-table to find further pages
Browse files Browse the repository at this point in the history
  • Loading branch information
fb55 committed Oct 15, 2011
1 parent 700d564 commit 841cd28
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 16 deletions.
17 changes: 17 additions & 0 deletions readabilitySAX/node_url_module.js

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

34 changes: 18 additions & 16 deletions readabilitySAX/readabilitySAX.xml
Expand Up @@ -14,50 +14,49 @@
<key id="html" type="xs:string" paramType="variable"/>
<key id="url" type="xs:string" paramType="variable"/>
<key id="type" type="xs:string" paramType="variable" default="html" />
<key id="expand_url" type="xs:string" paramType="variable" default="t" />
<key id="linksToSkip" type="xs:string" paramType="variable" default="{}" />
</inputs>
<execute><![CDATA[
//polyfill for object.create (https://developer.mozilla.org/en/JavaScript/Reference/Global_Objects/Object/create#Polyfill)
if (!Object.create) {
Object.create = function (o) {
if (arguments.length > 1) {
throw new Error('Object.create implementation only accepts the first parameter.');
}
function F() {}
F.prototype = o;
return new F();
};
}
//polyfill for string.prototype.trim
if (!String.prototype.trim) {
//http://blog.stevenlevithan.com/archives/faster-trim-javascript
var trimBeginRegexp = /^\s\s*/;
var trimEndRegexp = /\s\s*$/;
String.prototype.trim = function () {
return String(this).replace(trimBeginRegexp, '').replace(trimEndRegexp, '');
};
}
y.include("https://raw.github.com/FB55/yql-tables/master/readabilitySAX/E4XasSAX.js");
y.include("https://raw.github.com/FB55/yql-tables/master/readabilitySAX/node_url_module.js");
y.include("https://raw.github.com/FB55/readabilitySAX/master/readabilitySAX.js");
if(expand_url === "t"){
y.include("https://raw.github.com/FB55/yql-tables/master/longurl/long_url.js");
url = getLongURL(url);
}
var doc;
if(typeof html === "undefined" || !html)
doc = y.tidy(y.rest(url).get().response );
doc = y.tidy(y.rest(url).get().response);
else
doc = y.tidy(html);
linksToSkip = JSON.parse(linksToSkip);
var skipLevel = 0;
var contentLength = 0;
var p, r;
while(contentLength < 250 && skipLevel < 4){
p = {};
r = new readability.process(p,{
r = readability.process(p,{
log:function(){y.log.apply(y,arguments)},
skipLevel: skipLevel,
pageURL: url
linksToSkip: linksToSkip,
pageURL: url,
url: node_url_lib
});
saxParser(doc, p);
Expand All @@ -67,7 +66,10 @@
}
y.log("got article");
response.object = r.getArticle(type);
doc = r.getArticle(type);
doc.url = url;
response.object = doc;
]]>
</execute>
</select>
Expand Down

0 comments on commit 841cd28

Please sign in to comment.