Skip to content
Browse files

refactoring

  • Loading branch information...
1 parent 6536e9f commit 6ad453285a7fa088754db3d100c317698683efad @tarao tarao committed Sep 4, 2009
Showing with 644 additions and 1,340 deletions.
  1. +1 −0 .gitignore
  2. +8 −0 Makefile
  3. +22 −303 lib/extract-content.js
  4. +308 −0 lib/lib.js
  5. +7 −179 lib/scoring-words.js
  6. +112 −392 sketch/extract-content.test.js
  7. +186 −466 sketch/suggest-tag.test.js
View
1 .gitignore
@@ -0,0 +1 @@
+extract-content-allinone.js
View
8 Makefile
@@ -0,0 +1,8 @@
+all:
+ cat lib/lib.js \
+ lib/extract-content.js \
+ lib/scoring-words.js \
+ > extract-content-allinone.js
+
+clean:
+ rm extract-content-allinone.js
View
325 lib/extract-content.js
@@ -1,302 +1,11 @@
-if (typeof WWW == 'undefined') {
- var WWW = {};
+if (typeof ExtractContentJS == 'undefined') {
+ var ExtractContentJS = {};
}
(function(ns) {
- var Util = {
- inherit: function(child,parent) {
- var obj = child || {};
- for (var prop in parent) {
- if (typeof obj[prop] == 'undefined') {
- obj[prop] = parent[prop];
- }
- }
- return obj;
- },
- countMatch: function(text, regex) {
- return text.split(regex).length - 1;
- // var n=0;
- // for (var i=0;;) {
- // i = text.search(regex);
- // if (i < 0) break;
- // n++;
- // text = text.substr(i+1);
- // }
- // return n;
- },
- dump: function(obj) {
- if (typeof obj == 'undefined') return 'undefined';
- if (typeof obj == 'string') return '"' + obj + '"';
- if (typeof obj != 'object') return ''+obj;
- if (obj === null) return 'null';
- if (obj instanceof Array) {
- return '['
- + obj.map(function(v){return 'obj'/*Util.dump(v)*/;}).join(',')
- + ']';
- } else {
- var arr = [];
- for (var prop in obj) {
- arr.push(prop + ':' + 'obj'/*Util.dump(obj[prop])*/);
- }
- return '{' + arr.join(',') + '}';
- }
- }
- };
-
- var A = {
- indexOf: Array.indexOf || function(self, elt/*, from*/) {
- var argi = 2;
- var len = self.length;
- var from = Number(arguments[argi++]) || 0;
- from = (from < 0) ? Math.ceil(from) : Math.floor(from);
- if (from < 0) from += len;
- for (; from < len; from++) {
- if (from in self && self[from] === elt) return from;
- }
- return -1;
- },
- filter: Array.filter || function(self, fun/*, thisp*/) {
- var argi = 2;
- var len = self.length;
- if (typeof fun != "function") {
- throw new TypeError('A.filter: not a function');
- }
- var rv = new Array();
- var thisp = arguments[argi++];
- for (var i = 0; i < len; i++) {
- if (i in self) {
- var val = self[i]; // in case fun mutates this
- if (fun.call(thisp, val, i, self)) rv.push(val);
- }
- }
- return rv;
- },
- forEach: Array.forEach || function(self, fun/*, thisp*/) {
- var argi = 2;
- var len = self.length;
- if (typeof fun != 'function') {
- throw new TypeError('A.forEach: not a function');
- }
- var thisp = arguments[argi++];
- for (var i=0; i < len; i++) {
- if (i in self) fun.call(thisp, self[i], i, self);
- }
- },
- every: Array.every || function(self, fun/*, thisp*/) {
- var argi = 2;
- var len = self.length;
- if (typeof fun != 'function') {
- throw new TypeError('A.every: not a function');
- }
- var thisp = arguments[argi++];
- for (var i = 0; i < len; i++) {
- if (i in self &&
- !fun.call(thisp, self[i], i, self)) {
- return false;
- }
- }
- return true;
- },
- map: Array.map || function(self, fun/*, thisp*/) {
- var argi = 2;
- var len = self.length;
- if (typeof fun != 'function') {
- throw new TypeError('A.map: not a function');
- }
- var rv = new Array(len);
- var thisp = arguments[argi++];
- for (var i = 0; i < len; i++) {
- if (i in self) {
- rv[i] = fun.call(thisp, self[i], i, self);
- }
- }
- return rv;
- },
- some: Array.some || function(self, fun/*, thisp*/) {
- var argi = 2;
- var len = self.length;
- if (typeof fun != "function") {
- throw new TypeError('A.some: not a function');
- }
- var thisp = arguments[argi++];
- for (var i = 0; i < len; i++) {
- if (i in self &&
- fun.call(thisp, self[i], i, self)) {
- return true;
- }
- }
- return false;
- },
- reduce: Array.reduce || function(self, fun/*, initial*/) {
- var argi = 2;
- var len = self.length;
- if (typeof fun != 'function') {
- throw TypeError('A.reduce: not a function ');
- }
- var i = 0;
- var prev;
- if (arguments.length > argi) {
- var rv = arguments[argi++];
- } else {
- do {
- if (i in self) {
- rv = self[i++];
- break;
- }
- if (++i >= len) {
- throw new TypeError('A.reduce: empty array');
- }
- } while (true);
- }
- for (; i < len; i++) {
- if (i in self) rv = fun.call(null, rv, self[i], i, self);
- }
- return rv;
- },
- zip: function(self) {
- if (self[0] instanceof Array) {
- var l = self[0].length;
- var len = self.length;
- var z = new Array(l);
- for (var i=0; i < l; i++) {
- z[i] = [];
- for (var j=0; j < len; j++) {
- z[i].push(self[j][i]);
- }
- }
- return z;
- }
- return [];
- },
- first: function(self) {
- return self ? self[0] : null;
- },
- last: function(self) {
- return self ? self[self.length-1] : null;
- },
- push: function(self, other) {
- return Array.prototype.push.apply(self, other);
- }
- };
-
- var DOM = {
- getElementStyle: function(elem, prop) {
- var style = elem.style ? elem.style[prop] : null;
- if (!style) {
- var dv = document.defaultView;
- if (dv && dv.getComputedStyle) {
- try {
- var styles = dv.getComputedStyle(elem, null);
- } catch(e) {
- return null;
- }
- prop = prop.replace(/([A-Z])/g, '-$1').toLowerCase();
- style = styles ? styles.getPropertyValue(prop) : null;
- } else if (elem.currentStyle) {
- style = elem.currentStyle[prop];
- }
- }
- return style;
- },
- text: function(node) {
- if (typeof node.textContent != 'undefined') {
- return node.textContent;
- } else if (node.nodeName == '#text') {
- return node.nodeValue;
- } else if (typeof node.innerText != 'undefined') {
- return node.innerText; // IE
- }
- },
- ancestors: function(e) {
- var body = e.ownerDocument.body;
- var r = [];
- var it = e;
- while (it != body) {
- r.push(it);
- it = it.parentNode;
- }
- r.push(body);
- return r; // [e .. document.body]
- },
- commonAncestor: function(e1, e2) {
- var a1 = DOM.ancestors(e1).reverse();
- var a2 = DOM.ancestors(e2).reverse();
- var r = null;
- for (var i=0; a1[i] && a2[i] && a1[i] == a2[i]; i++) {
- r = a1[i];
- }
- return r;
- },
- countMatchTagAttr: function(node, tag, attr, regexs) {
- var test = function(v){ return v.test(node[attr]); };
- if ((node.tagName||'').toLowerCase()==tag && A.some(regexs,test)) {
- return 1;
- }
- var n=0;
- var children = node.childNodes;
- for (var i=0, len=children.length; i < len; i++) {
- n += DOM.countMatchTagAttr(children[i], tag, attr, regexs);
- }
- return n;
- },
- matchTag: function(node, pat) {
- return A.some(pat, function(v){
- if (typeof v == 'string') {
- return v == (node.tagName||'').toLowerCase();
- } else if (v instanceof Array) {
- return v[0] == (node.tagName||'').toLowerCase()
- && DOM.matchAttr(node, v[1]);
- } else {
- return false;
- }
- });
- },
- matchAttr: function(node, pat) {
- var test = function(pat, val) {
- if (typeof pat == 'string') {
- return pat == val;
- } else if (pat instanceof RegExp) {
- return pat.test(val);
- } else if (pat instanceof Array) {
- return A.some(pat,function(v){return test(v,val);});
- } else if (pat instanceof Object) {
- for (var prop in pat) {
- var n = node[prop];
- if (n && DOM.matchAttr(n, pat[prop])) {
- return true;
- }
- }
- }
- return false;
- };
- for (var prop in pat) {
- var attr = node[prop];
- var ar = pat[prop];
- if (attr) {
- return test(ar, attr);
- }
- }
- return false;
- },
- matchStyle: function(node, pat) {
- var test = function(pat, val) {
- if (typeof pat == 'string') {
- return pat == val;
- } else if (pat instanceof RegExp) {
- return pat.test(val);
- } else if (pat instanceof Array) {
- return A.some(pat,function(v){return test(v,val);});
- }
- return false;
- };
- for (var prop in pat) {
- if (test(pat[prop], DOM.getElementStyle(node, prop))) {
- return true;
- }
- }
- return false;
- }
- };
+ var Util = ns.Lib.Util;
+ var A = ns.Lib.A;
+ var DOM = ns.Lib.DOM;
var Leaf = Util.inherit(function(node, depth, inside) {
var leaf = { node: node, depth: depth, inside: inside };
@@ -344,15 +53,22 @@ if (typeof WWW == 'undefined') {
self._node = Leaf.commonAncestor.apply(null, self._content);
return self._node;
};
- self.asText = function() {
- if (self._text) return self._text;
+ self.asTextFragment = function() {
+ if (self._textFragment) return self._textFragment;
if (self._content.length < 1) return '';
- self._text = A.reduce(self._content, function(prev,curr) {
+ self._textFragment = A.reduce(self._content, function(prev,curr) {
var s = DOM.text(curr.node);
s = s.replace(/^\s+/g,'').replace(/\s+$/g,'');
s = s.replace(/\s+/g,' ');
return prev + s;
}, '');
+ return self._textFragment;
+ };
+ self.toString = function() {
+ if (self._text) return self._text;
+ // covering node
+ var node = self.asNode();
+ self._text = node ? node.textContent : '';
return self._text;
};
@@ -384,7 +100,8 @@ if (typeof WWW == 'undefined') {
self.extract = function(d) {
var url = d.location.href;
var res = { title: d.title };
- for (var i=0, len=self.handler.length; i < len; i++) {
+ var len = self.handler.length;
+ for (var i=0; i < len; i++) {
var content = self.handler[i].extract(d, url, res);
if (!content) continue;
@@ -394,7 +111,7 @@ if (typeof WWW == 'undefined') {
}
content = new Content(content);
- if (!content.asText().length) continue;
+ if (!content.toString().length) continue;
res.content = content;
res.isSuccess = true;
res.engine = res.engine || self.handler[i];
@@ -611,7 +328,8 @@ if (typeof WWW == 'undefined') {
var blocks = Block.split(d.body);
var last;
- for (var i=0, len=blocks.length; i < len; i++) {
+ var len = blocks.length;
+ for (var i=0; i < len; i++) {
var block = blocks[i];
if (last) {
continuous /= self.opt.factor.continuous;
@@ -649,4 +367,5 @@ if (typeof WWW == 'undefined') {
return self;
};
-})(WWW);
+})(ExtractContentJS);
+
View
308 lib/lib.js
@@ -0,0 +1,308 @@
+if (typeof ExtractContentJS == 'undefined') {
+ var ExtractContentJS = {};
+}
+if (typeof ExtractContentJS.Lib == 'undefined') {
+ ExtractContentJS.Lib = {};
+}
+
+ExtractContentJS.Lib.Util = {
+ inherit: function(child,parent) {
+ var obj = child || {};
+ for (var prop in parent) {
+ if (typeof obj[prop] == 'undefined') {
+ obj[prop] = parent[prop];
+ }
+ }
+ return obj;
+ },
+ countMatch: function(text, regex) {
+ return text.split(regex).length - 1;
+ // var n=0;
+ // for (var i=0;;) {
+ // i = text.search(regex);
+ // if (i < 0) break;
+ // n++;
+ // text = text.substr(i+1);
+ // }
+ // return n;
+ },
+ dump: function(obj) {
+ if (typeof obj == 'undefined') return 'undefined';
+ if (typeof obj == 'string') return '"' + obj + '"';
+ if (typeof obj != 'object') return ''+obj;
+ if (obj === null) return 'null';
+ if (obj instanceof Array) {
+ return '['
+ + obj.map(function(v){return 'obj'/*Util.dump(v)*/;}).join(',')
+ + ']';
+ } else {
+ var arr = [];
+ for (var prop in obj) {
+ arr.push(prop + ':' + 'obj'/*Util.dump(obj[prop])*/);
+ }
+ return '{' + arr.join(',') + '}';
+ }
+ }
+};
+
+ExtractContentJS.Lib.A = (function() {
+ var A = {};
+ A.indexOf = Array.indexOf || function(self, elt/*, from*/) {
+ var argi = 2;
+ var len = self.length;
+ var from = Number(arguments[argi++]) || 0;
+ from = (from < 0) ? Math.ceil(from) : Math.floor(from);
+ if (from < 0) from += len;
+ for (; from < len; from++) {
+ if (from in self && self[from] === elt) return from;
+ }
+ return -1;
+ };
+ A.filter = Array.filter || function(self, fun/*, thisp*/) {
+ var argi = 2;
+ var len = self.length;
+ if (typeof fun != "function") {
+ throw new TypeError('A.filter: not a function');
+ }
+ var rv = new Array();
+ var thisp = arguments[argi++];
+ for (var i = 0; i < len; i++) {
+ if (i in self) {
+ var val = self[i]; // in case fun mutates this
+ if (fun.call(thisp, val, i, self)) rv.push(val);
+ }
+ }
+ return rv;
+ };
+ A.forEach = Array.forEach || function(self, fun/*, thisp*/) {
+ var argi = 2;
+ var len = self.length;
+ if (typeof fun != 'function') {
+ throw new TypeError('A.forEach: not a function');
+ }
+ var thisp = arguments[argi++];
+ for (var i=0; i < len; i++) {
+ if (i in self) fun.call(thisp, self[i], i, self);
+ }
+ };
+ A.every = Array.every || function(self, fun/*, thisp*/) {
+ var argi = 2;
+ var len = self.length;
+ if (typeof fun != 'function') {
+ throw new TypeError('A.every: not a function');
+ }
+ var thisp = arguments[argi++];
+ for (var i = 0; i < len; i++) {
+ if (i in self &&
+ !fun.call(thisp, self[i], i, self)) {
+ return false;
+ }
+ }
+ return true;
+ };
+ A.map = Array.map || function(self, fun/*, thisp*/) {
+ var argi = 2;
+ var len = self.length;
+ if (typeof fun != 'function') {
+ throw new TypeError('A.map: not a function');
+ }
+ var rv = new Array(len);
+ var thisp = arguments[argi++];
+ for (var i = 0; i < len; i++) {
+ if (i in self) {
+ rv[i] = fun.call(thisp, self[i], i, self);
+ }
+ }
+ return rv;
+ };
+ A.some = Array.some || function(self, fun/*, thisp*/) {
+ var argi = 2;
+ var len = self.length;
+ if (typeof fun != "function") {
+ throw new TypeError('A.some: not a function');
+ }
+ var thisp = arguments[argi++];
+ for (var i = 0; i < len; i++) {
+ if (i in self &&
+ fun.call(thisp, self[i], i, self)) {
+ return true;
+ }
+ }
+ return false;
+ };
+ A.reduce = Array.reduce || function(self, fun/*, initial*/) {
+ var argi = 2;
+ var len = self.length;
+ if (typeof fun != 'function') {
+ throw TypeError('A.reduce: not a function ');
+ }
+ var i = 0;
+ var prev;
+ if (arguments.length > argi) {
+ var rv = arguments[argi++];
+ } else {
+ do {
+ if (i in self) {
+ rv = self[i++];
+ break;
+ }
+ if (++i >= len) {
+ throw new TypeError('A.reduce: empty array');
+ }
+ } while (true);
+ }
+ for (; i < len; i++) {
+ if (i in self) rv = fun.call(null, rv, self[i], i, self);
+ }
+ return rv;
+ };
+ A.zip = function(self) {
+ if (self[0] instanceof Array) {
+ var l = self[0].length;
+ var len = self.length;
+ var z = new Array(l);
+ for (var i=0; i < l; i++) {
+ z[i] = [];
+ for (var j=0; j < len; j++) {
+ z[i].push(self[j][i]);
+ }
+ }
+ return z;
+ }
+ return [];
+ };
+ A.first = function(self) {
+ return self ? self[0] : null;
+ };
+ A.last = function(self) {
+ return self ? self[self.length-1] : null;
+ };
+ A.push = function(self, other) {
+ return Array.prototype.push.apply(self, other);
+ };
+ return A;
+})();
+
+ExtractContentJS.Lib.DOM = (function() {
+ var A = ExtractContentJS.Lib.A;
+ var DOM = {};
+ DOM.getElementStyle = function(elem, prop) {
+ var style = elem.style ? elem.style[prop] : null;
+ if (!style) {
+ var dv = document.defaultView;
+ if (dv && dv.getComputedStyle) {
+ try {
+ var styles = dv.getComputedStyle(elem, null);
+ } catch(e) {
+ return null;
+ }
+ prop = prop.replace(/([A-Z])/g, '-$1').toLowerCase();
+ style = styles ? styles.getPropertyValue(prop) : null;
+ } else if (elem.currentStyle) {
+ style = elem.currentStyle[prop];
+ }
+ }
+ return style;
+ };
+ DOM.text = function(node) {
+ if (typeof node.textContent != 'undefined') {
+ return node.textContent;
+ } else if (node.nodeName == '#text') {
+ return node.nodeValue;
+ } else if (typeof node.innerText != 'undefined') {
+ return node.innerText; // IE
+ }
+ return null;
+ };
+ DOM.ancestors = function(e) {
+ var body = e.ownerDocument.body;
+ var r = [];
+ var it = e;
+ while (it != body) {
+ r.push(it);
+ it = it.parentNode;
+ }
+ r.push(body);
+ return r; // [e .. document.body]
+ };
+ DOM.commonAncestor = function(e1, e2) {
+ var a1 = DOM.ancestors(e1).reverse();
+ var a2 = DOM.ancestors(e2).reverse();
+ var r = null;
+ for (var i=0; a1[i] && a2[i] && a1[i] == a2[i]; i++) {
+ r = a1[i];
+ }
+ return r;
+ };
+ DOM.countMatchTagAttr = function(node, tag, attr, regexs) {
+ var test = function(v){ return v.test(node[attr]); };
+ if ((node.tagName||'').toLowerCase()==tag && A.some(regexs,test)) {
+ return 1;
+ }
+ var n=0;
+ var children = node.childNodes;
+ for (var i=0, len=children.length; i < len; i++) {
+ n += DOM.countMatchTagAttr(children[i], tag, attr, regexs);
+ }
+ return n;
+ };
+ DOM.matchTag = function(node, pat) {
+ return A.some(pat, function(v){
+ if (typeof v == 'string') {
+ return v == (node.tagName||'').toLowerCase();
+ } else if (v instanceof Array) {
+ return v[0] == (node.tagName||'').toLowerCase()
+ && DOM.matchAttr(node, v[1]);
+ } else {
+ return false;
+ }
+ });
+ };
+ DOM.matchAttr = function(node, pat) {
+ var test = function(pat, val) {
+ if (typeof pat == 'string') {
+ return pat == val;
+ } else if (pat instanceof RegExp) {
+ return pat.test(val);
+ } else if (pat instanceof Array) {
+ return A.some(pat,function(v){return test(v,val);});
+ } else if (pat instanceof Object) {
+ for (var prop in pat) {
+ var n = node[prop];
+ if (n && DOM.matchAttr(n, pat[prop])) {
+ return true;
+ }
+ }
+ }
+ return false;
+ };
+ for (var prop in pat) {
+ var attr = node[prop];
+ var ar = pat[prop];
+ if (attr) {
+ return test(ar, attr);
+ }
+ }
+ return false;
+ };
+ DOM.matchStyle = function(node, pat) {
+ var test = function(pat, val) {
+ if (typeof pat == 'string') {
+ return pat == val;
+ } else if (pat instanceof RegExp) {
+ return pat.test(val);
+ } else if (pat instanceof Array) {
+ return A.some(pat,function(v){return test(v,val);});
+ }
+ return false;
+ };
+ for (var prop in pat) {
+ if (test(pat[prop], DOM.getElementStyle(node, prop))) {
+ return true;
+ }
+ }
+ return false;
+ };
+ return DOM;
+})();
+
View
186 lib/scoring-words.js
@@ -1,183 +1,10 @@
-if (typeof Scoring == 'undefined') {
- var Scoring = {};
+if (typeof ExtractContentJS == 'undefined') {
+ var ExtractContentJS = {};
}
(function(ns) {
- var Util = {
- inherit: function(child,parent) {
- var obj = child || {};
- for (var prop in parent) {
- if (typeof obj[prop] == 'undefined') {
- obj[prop] = parent[prop];
- }
- }
- return obj;
- },
- countMatch: function(text, regex) {
- return text.split(regex).length - 1;
- // var n=0;
- // for (var i=0;;) {
- // i = text.search(regex);
- // if (i < 0) break;
- // n++;
- // text = text.substr(i+1);
- // }
- // return n;
- },
- dump: function(obj) {
- if (typeof obj == 'undefined') return 'undefined';
- if (typeof obj == 'string') return '"' + obj + '"';
- if (typeof obj != 'object') return ''+obj;
- if (obj === null) return 'null';
- if (obj instanceof Array) {
- return '['
- + obj.map(function(v){return 'obj'/*Util.dump(v)*/;}).join(',')
- + ']';
- } else {
- var arr = [];
- for (var prop in obj) {
- arr.push(prop + ':' + 'obj'/*Util.dump(obj[prop])*/);
- }
- return '{' + arr.join(',') + '}';
- }
- }
- };
-
- var A = {
- indexOf: Array.indexOf || function(self, elt/*, from*/) {
- var argi = 2;
- var len = self.length;
- var from = Number(arguments[argi++]) || 0;
- from = (from < 0) ? Math.ceil(from) : Math.floor(from);
- if (from < 0) from += len;
- for (; from < len; from++) {
- if (from in self && self[from] === elt) return from;
- }
- return -1;
- },
- filter: Array.filter || function(self, fun/*, thisp*/) {
- var argi = 2;
- var len = self.length;
- if (typeof fun != "function") {
- throw new TypeError('A.filter: not a function');
- }
- var rv = new Array();
- var thisp = arguments[argi++];
- for (var i = 0; i < len; i++) {
- if (i in self) {
- var val = self[i]; // in case fun mutates this
- if (fun.call(thisp, val, i, self)) rv.push(val);
- }
- }
- return rv;
- },
- forEach: Array.forEach || function(self, fun/*, thisp*/) {
- var argi = 2;
- var len = self.length;
- if (typeof fun != 'function') {
- throw new TypeError('A.forEach: not a function');
- }
- var thisp = arguments[argi++];
- for (var i=0; i < len; i++) {
- if (i in self) fun.call(thisp, self[i], i, self);
- }
- },
- every: Array.every || function(self, fun/*, thisp*/) {
- var argi = 2;
- var len = self.length;
- if (typeof fun != 'function') {
- throw new TypeError('A.every: not a function');
- }
- var thisp = arguments[argi++];
- for (var i = 0; i < len; i++) {
- if (i in self &&
- !fun.call(thisp, self[i], i, self)) {
- return false;
- }
- }
- return true;
- },
- map: Array.map || function(self, fun/*, thisp*/) {
- var argi = 2;
- var len = self.length;
- if (typeof fun != 'function') {
- throw new TypeError('A.map: not a function');
- }
- var rv = new Array(len);
- var thisp = arguments[argi++];
- for (var i = 0; i < len; i++) {
- if (i in self) {
- rv[i] = fun.call(thisp, self[i], i, self);
- }
- }
- return rv;
- },
- some: Array.some || function(self, fun/*, thisp*/) {
- var argi = 2;
- var len = self.length;
- if (typeof fun != "function") {
- throw new TypeError('A.some: not a function');
- }
- var thisp = arguments[argi++];
- for (var i = 0; i < len; i++) {
- if (i in self &&
- fun.call(thisp, self[i], i, self)) {
- return true;
- }
- }
- return false;
- },
- reduce: Array.reduce || function(self, fun/*, initial*/) {
- var argi = 2;
- var len = self.length;
- if (typeof fun != 'function') {
- throw TypeError('A.reduce: not a function ');
- }
- var i = 0;
- var prev;
- if (arguments.length > argi) {
- var rv = arguments[argi++];
- } else {
- do {
- if (i in self) {
- rv = self[i++];
- break;
- }
- if (++i >= len) {
- throw new TypeError('A.reduce: empty array');
- }
- } while (true);
- }
- for (; i < len; i++) {
- if (i in self) rv = fun.call(null, rv, self[i], i, self);
- }
- return rv;
- },
- zip: function(self) {
- if (self[0] instanceof Array) {
- var l = self[0].length;
- var len = self.length;
- var z = new Array(l);
- for (var i=0; i < l; i++) {
- z[i] = [];
- for (var j=0; j < len; j++) {
- z[i].push(self[j][i]);
- }
- }
- return z;
- }
- return [];
- },
- first: function(self) {
- return self ? self[0] : null;
- },
- last: function(self) {
- return self ? self[self.length-1] : null;
- },
- push: function(self, other) {
- return Array.prototype.push.apply(self, other);
- }
- };
+ var Util = ns.Lib.Util;
+ var A = ns.Lib.A;
ns.RelativeWords = function(/* engines */) {
var self = { engine: arguments[0] || [] };
@@ -233,7 +60,7 @@ if (typeof Scoring == 'undefined') {
var tf = 0;
var w = t.toLowerCase();
- tf += Util.countMatch(doc.content.asText().toLowerCase(), w);
+ tf += Util.countMatch((doc.content+'').toLowerCase(), w);
if (doc.title) {
tf += Util.countMatch(doc.title.toLowerCase(), w);
}
@@ -250,4 +77,5 @@ if (typeof Scoring == 'undefined') {
return self;
};
-})(Scoring);
+})(ExtractContentJS);
+
View
504 sketch/extract-content.test.js
@@ -1,4 +1,113 @@
-(function(url, list, callback) {
+(function() {
+ var url = 'http://labs.orezdnu.org/js/extract-content/'; // for test
+ var libs = [
+ [ 'lib.js', [
+ 'ExtractContentJS.Lib.Util',
+ 'ExtractContentJS.Lib.A',
+ 'ExtractContentJS.Lib.DOM'
+ ] ],
+ [ 'extract-content.js', 'ExtractContentJS.LayeredExtractor' ]
+ ];
+ var testFunc = function(l) {
+ var Util = ExtractContentJS.Lib.Util;
+ var A = ExtractContentJS.Lib.A;
+ var DOM = ExtractContentJS.Lib.DOM;
+
+ if (typeof l.ExtractContentTest == 'undefined') {
+ var ExtractContentTest = {};
+ }
+ var debug = l.ExtractContentTest.debug;
+
+ l.ExtractContentTest.extractContent = function(d) {
+ if (!d.body) return null;
+
+ if (l.ExtractContentTest.only == 'Heuristics') {
+ // test only Heuristics handler
+ var ex = new WWW.LayeredExtractor.Handler.Heuristics();
+ ex.extract(d);
+ var blocks = ex.blocks || [ ex.content.asLeaves() ];
+ var div = d.createElement('div');
+ var ul = d.createElement('ul');
+ A.forEach(blocks, function(b) {
+ var li = d.createElement('li');
+ li.appendChild(d.createTextNode(b.score));
+ var ul2 = d.createElement('ul');
+ A.forEach(b.leaves, function(v){
+ v = v.node;
+ var s = v.tagName || DOM.text(v) || Util.dump(v);
+ s = s.replace(/\s+/g, '');
+ var li2 = d.createElement('li');
+ s = v.nodeName + ': ' + (s.length ? s : '<empty>');
+ li2.appendChild(d.createTextNode(s));
+ ul2.appendChild(li2);
+ });
+ li.appendChild(ul2);
+ ul.appendChild(li);
+ });
+ div.appendChild(ul);
+ return div;
+ }
+
+ /* TEST for layred handlers */
+
+ var ex = new ExtractContentJS.LayeredExtractor;
+// ex.addHandler( ex.factory.getHandler('Description') );
+// ex.addHandler( ex.factory.getHandler('Scraper'));
+// ex.addHandler( ex.factory.getHandler('GoogleAdsence') );
+ ex.addHandler( ex.factory.getHandler('Heuristics') );
+ var res = ex.extract(d);
+
+ if (!res.isSuccess) {
+ return d.createTextNode('failed');
+ } else if (!debug) {
+ if (l.ExtractContentTest.asText) {
+ return d.createTextNode(res.content.toString());
+ } else if (l.ExtractContentTest.asTextFragment) {
+ return d.createTextNode(res.content.asTextFragment());
+ }
+ var node = res.content.asNode();
+ if (node != d.body) {
+ return node.cloneNode(true);
+ }
+ } else { // debug
+ var blocks = res.engine.blocks || [ res.content.asLeaves() ];
+ var div = d.createElement('div');
+ var ul = d.createElement('ul');
+ A.forEach(blocks, function(b) {
+ var li = d.createElement('li');
+ li.appendChild(d.createTextNode(b.score));
+ var ul2 = d.createElement('ul');
+ A.forEach(b.leaves, function(v){
+ v = v.node;
+ var s = v.tagName || DOM.text(v) || Util.dump(v);
+ s = s.replace(/\s+/g, '');
+ var li2 = d.createElement('li');
+ s = v.nodeName + ': ' + (s.length ? s : '<empty>');
+ li2.appendChild(d.createTextNode(s));
+ ul2.appendChild(li2);
+ });
+ li.appendChild(ul2);
+ ul.appendChild(li);
+ });
+ div.appendChild(ul);
+ return div;
+ }
+ };
+
+ l.ExtractContentTest.doTest = function() {
+ var e = l.ExtractContentTest.extractContent(document);
+ var b = document.body;
+ while (b.firstChild) {
+ b.removeChild(b.firstChild);
+ }
+ b.appendChild(e);
+ };
+
+ if (l.ExtractContentTest.auto) {
+ l.ExtractContentTest.doTest();
+ }
+ };
+
var A = {
filter: Array.filter || function(self, fun/*, thisp*/) {
var argi = 2;
@@ -98,394 +207,5 @@
return self;
};
- new Libs(url, null).loadEach(list, callback);
-})('http://labs.orezdnu.org/js/', [
- [ 'extract-content.js', 'WWW.LayeredExtractor' ]
-], function(l) {
- var Util = {
- inherit: function(child,parent) {
- var obj = child || {};
- for (var prop in parent) {
- if (typeof obj[prop] == 'undefined') {
- obj[prop] = parent[prop];
- }
- }
- return obj;
- },
- countMatch: function(text, regex) {
- return text.split(regex).length - 1;
- // var n=0;
- // for (var i=0;;) {
- // i = text.search(regex);
- // if (i < 0) break;
- // n++;
- // text = text.substr(i+1);
- // }
- // return n;
- },
- dump: function(obj) {
- if (typeof obj == 'undefined') return 'undefined';
- if (typeof obj == 'string') return '"' + obj + '"';
- if (typeof obj != 'object') return ''+obj;
- if (obj === null) return 'null';
- if (obj instanceof Array) {
- return '['
- + obj.map(function(v){return 'obj'/*Util.dump(v)*/;}).join(',')
- + ']';
- } else {
- var arr = [];
- for (var prop in obj) {
- arr.push(prop + ':' + 'obj'/*Util.dump(obj[prop])*/);
- }
- return '{' + arr.join(',') + '}';
- }
- }
- };
-
- var A = {
- indexOf: Array.indexOf || function(self, elt/*, from*/) {
- var argi = 2;
- var len = self.length;
- var from = Number(arguments[argi++]) || 0;
- from = (from < 0) ? Math.ceil(from) : Math.floor(from);
- if (from < 0) from += len;
- for (; from < len; from++) {
- if (from in self && self[from] === elt) return from;
- }
- return -1;
- },
- filter: Array.filter || function(self, fun/*, thisp*/) {
- var argi = 2;
- var len = self.length;
- if (typeof fun != "function") {
- throw new TypeError('A.filter: not a function');
- }
- var rv = new Array();
- var thisp = arguments[argi++];
- for (var i = 0; i < len; i++) {
- if (i in self) {
- var val = self[i]; // in case fun mutates this
- if (fun.call(thisp, val, i, self)) rv.push(val);
- }
- }
- return rv;
- },
- forEach: Array.forEach || function(self, fun/*, thisp*/) {
- var argi = 2;
- var len = self.length;
- if (typeof fun != 'function') {
- throw new TypeError('A.forEach: not a function');
- }
- var thisp = arguments[argi++];
- for (var i=0; i < len; i++) {
- if (i in self) fun.call(thisp, self[i], i, self);
- }
- },
- every: Array.every || function(self, fun/*, thisp*/) {
- var argi = 2;
- var len = self.length;
- if (typeof fun != 'function') {
- throw new TypeError('A.every: not a function');
- }
- var thisp = arguments[argi++];
- for (var i = 0; i < len; i++) {
- if (i in self &&
- !fun.call(thisp, self[i], i, self)) {
- return false;
- }
- }
- return true;
- },
- map: Array.map || function(self, fun/*, thisp*/) {
- var argi = 2;
- var len = self.length;
- if (typeof fun != 'function') {
- throw new TypeError('A.map: not a function');
- }
- var rv = new Array(len);
- var thisp = arguments[argi++];
- for (var i = 0; i < len; i++) {
- if (i in self) {
- rv[i] = fun.call(thisp, self[i], i, self);
- }
- }
- return rv;
- },
- some: Array.some || function(self, fun/*, thisp*/) {
- var argi = 2;
- var len = self.length;
- if (typeof fun != "function") {
- throw new TypeError('A.some: not a function');
- }
- var thisp = arguments[argi++];
- for (var i = 0; i < len; i++) {
- if (i in self &&
- fun.call(thisp, self[i], i, self)) {
- return true;
- }
- }
- return false;
- },
- reduce: Array.reduce || function(self, fun/*, initial*/) {
- var argi = 2;
- var len = self.length;
- if (typeof fun != 'function') {
- throw TypeError('A.reduce: not a function ');
- }
- var i = 0;
- var prev;
- if (arguments.length > argi) {
- var rv = arguments[argi++];
- } else {
- do {
- if (i in self) {
- rv = self[i++];
- break;
- }
- if (++i >= len) {
- throw new TypeError('A.reduce: empty array');
- }
- } while (true);
- }
- for (; i < len; i++) {
- if (i in self) rv = fun.call(null, rv, self[i], i, self);
- }
- return rv;
- },
- zip: function(self) {
- if (self[0] instanceof Array) {
- var l = self[0].length;
- var len = self.length;
- var z = new Array(l);
- for (var i=0; i < l; i++) {
- z[i] = [];
- for (var j=0; j < len; j++) {
- z[i].push(self[j][i]);
- }
- }
- return z;
- }
- return [];
- },
- first: function(self) {
- return self ? self[0] : null;
- },
- last: function(self) {
- return self ? self[self.length-1] : null;
- },
- push: function(self, other) {
- return Array.prototype.push.apply(self, other);
- }
- };
-
- var DOM = {
- getElementStyle: function(elem, prop) {
- var style = elem.style ? elem.style[prop] : null;
- if (!style) {
- var dv = document.defaultView;
- if (dv && dv.getComputedStyle) {
- try {
- var styles = dv.getComputedStyle(elem, null);
- } catch(e) {
- return null;
- }
- prop = prop.replace(/([A-Z])/g, '-$1').toLowerCase();
- style = styles ? styles.getPropertyValue(prop) : null;
- } else if (elem.currentStyle) {
- style = elem.currentStyle[prop];
- }
- }
- return style;
- },
- text: function(node) {
- if (typeof node.textContent != 'undefined') {
- return node.textContent;
- } else if (node.nodeName == '#text') {
- return node.nodeValue;
- } else if (typeof node.innerText != 'undefined') {
- return node.innerText; // IE
- }
- },
- ancestors: function(e) {
- var body = e.ownerDocument.body;
- var r = [];
- var it = e;
- while (it != body) {
- r.push(it);
- it = it.parentNode;
- }
- r.push(body);
- return r; // [e .. document.body]
- },
- commonAncestor: function(e1, e2) {
- var a1 = DOM.ancestors(e1).reverse();
- var a2 = DOM.ancestors(e2).reverse();
- var r = null;
- for (var i=0; a1[i] && a2[i] && a1[i] == a2[i]; i++) {
- r = a1[i];
- }
- return r;
- },
- countMatchTagAttr: function(node, tag, attr, regexs) {
- var test = function(v){ return v.test(node[attr]); };
- if ((node.tagName||'').toLowerCase()==tag && A.some(regexs,test)) {
- return 1;
- }
- var n=0;
- var children = node.childNodes;
- for (var i=0, len=children.length; i < len; i++) {
- n += DOM.countMatchTagAttr(children[i], tag, attr, regexs);
- }
- return n;
- },
- matchTag: function(node, pat) {
- return A.some(pat, function(v){
- if (typeof v == 'string') {
- return v == (node.tagName||'').toLowerCase();
- } else if (v instanceof Array) {
- return v[0] == (node.tagName||'').toLowerCase()
- && DOM.matchAttr(node, v[1]);
- } else {
- return false;
- }
- });
- },
- matchAttr: function(node, pat) {
- var test = function(pat, val) {
- if (typeof pat == 'string') {
- return pat == val;
- } else if (pat instanceof RegExp) {
- return pat.test(val);
- } else if (pat instanceof Array) {
- return A.some(pat,function(v){return test(v,val);});
- } else if (pat instanceof Object) {
- for (var prop in pat) {
- var n = node[prop];
- if (n && DOM.matchAttr(n, pat[prop])) {
- return true;
- }
- }
- }
- return false;
- };
- for (var prop in pat) {
- var attr = node[prop];
- var ar = pat[prop];
- if (attr) {
- return test(ar, attr);
- }
- }
- return false;
- },
- matchStyle: function(node, pat) {
- var test = function(pat, val) {
- if (typeof pat == 'string') {
- return pat == val;
- } else if (pat instanceof RegExp) {
- return pat.test(val);
- } else if (pat instanceof Array) {
- return A.some(pat,function(v){return test(v,val);});
- }
- return false;
- };
- for (var prop in pat) {
- if (test(pat[prop], DOM.getElementStyle(node, prop))) {
- return true;
- }
- }
- return false;
- }
- };
-
- if (typeof l.extractContentTest == 'undefined') {
- var extractContentTest = {};
- }
- var debug = l.extractContentTest.debug;
-
- l.extractContentTest.extractContent = function(d) {
- if (!d.body) return null;
-
- if (l.extractContentTest.only == 'Heuristics') {
- // test only Heuristics handler
- var ex = new WWW.LayeredExtractor.Handler.Heuristics();
- ex.extract(d);
- var blocks = ex.blocks || [ ex.content.asLeaves() ];
- var div = d.createElement('div');
- var ul = d.createElement('ul');
- A.forEach(blocks, function(b) {
- var li = d.createElement('li');
- li.appendChild(d.createTextNode(b.score));
- var ul2 = d.createElement('ul');
- A.forEach(b.leaves, function(v){
- v = v.node;
- var s = v.tagName || DOM.text(v) || Util.dump(v);
- s = s.replace(/\s+/g, '');
- var li2 = d.createElement('li');
- s = v.nodeName + ': ' + (s.length ? s : '<empty>');
- li2.appendChild(d.createTextNode(s));
- ul2.appendChild(li2);
- });
- li.appendChild(ul2);
- ul.appendChild(li);
- });
- div.appendChild(ul);
- return div;
- }
-
- /* TEST for layred handlers */
-
- var ex = new WWW.LayeredExtractor;
-// ex.addHandler( ex.factory.getHandler('Description') );
-// ex.addHandler( ex.factory.getHandler('Scraper'));
-// ex.addHandler( ex.factory.getHandler('GoogleAdsence') );
- ex.addHandler( ex.factory.getHandler('Heuristics') );
- var res = ex.extract(d);
-
- if (!res.isSuccess) {
- return d.createTextNode('failed');
- } else if (!debug) {
- if (l.extractContentTest.asText) {
- return d.createTextNode(res.content.asText());
- }
- var node = res.content.asNode();
- if (node != d.body) {
- return node.cloneNode(true);
- }
- } else { // debug
- var blocks = res.engine.blocks || [ res.content.asLeaves() ];
- var div = d.createElement('div');
- var ul = d.createElement('ul');
- A.forEach(blocks, function(b) {
- var li = d.createElement('li');
- li.appendChild(d.createTextNode(b.score));
- var ul2 = d.createElement('ul');
- A.forEach(b.leaves, function(v){
- v = v.node;
- var s = v.tagName || DOM.text(v) || Util.dump(v);
- s = s.replace(/\s+/g, '');
- var li2 = d.createElement('li');
- s = v.nodeName + ': ' + (s.length ? s : '<empty>');
- li2.appendChild(d.createTextNode(s));
- ul2.appendChild(li2);
- });
- li.appendChild(ul2);
- ul.appendChild(li);
- });
- div.appendChild(ul);
- return div;
- }
- };
-
- l.extractContentTest.doTest = function() {
- var e = l.extractContentTest.extractContent(document);
- var b = document.body;
- while (b.firstChild) {
- b.removeChild(b.firstChild);
- }
- b.appendChild(e);
- };
-
- if (l.extractContentTest.auto) {
- l.extractContentTest.doTest();
- }
-});
+ new Libs(url, null).loadEach(libs, testFunc);
+})();
View
652 sketch/suggest-tag.test.js
@@ -1,4 +1,187 @@
-(function(url, list, callback) {
+(function() {
+ var url = 'http://labs.orezdnu.org/js/extract-content/'; // for test
+ var libs = [
+ [ 'lib.js', [
+ 'ExtractContentJS.Lib.Util',
+ 'ExtractContentJS.Lib.A',
+ 'ExtractContentJS.Lib.DOM'
+ ] ],
+ [ 'extract-content.js', 'ExtractContentJS.LayeredExtractor' ],
+ [ 'scoring-words.js', 'ExtractContentJS.RelativeWords' ]
+ ];
+ var testFunc = function(l) {
+ var Util = ExtractContentJS.Lib.Util;
+ var A = ExtractContentJS.Lib.A;
+ var DOM = ExtractContentJS.Lib.DOM;
+
+ if (typeof l.SuggestTagTest == 'undefined') {
+ var SuggestTagTest = {};
+ }
+ var debug = l.SuggestTagTest.debug;
+
+ l.SuggestTagTest.suggestTags = function(d, tags) {
+ if (!d.body) return null;
+
+ var ex = new ExtractContentJS.LayeredExtractor;
+// ex.addHandler( ex.factory.getHandler('Description') );
+// ex.addHandler( ex.factory.getHandler('Scraper'));
+// ex.addHandler( ex.factory.getHandler('GoogleAdsence') );
+ ex.addHandler( ex.factory.getHandler('Heuristics') );
+ var res = ex.extract(d);
+
+ if (!res.isSuccess) return null;
+
+ var sc = new ExtractContentJS.RelativeWords();
+ sc.addEngine( sc.factory.getEngine('TfIdf') );
+ /* FIXME: other engines */
+
+ return sc.top(res, tags);
+ };
+
+ l.SuggestTagTest.doTest = function() {
+ var limit = l.SuggestTagTest.limit || 5;
+ var tags = l.SuggestTagTest.data || {
+ '2008': 1,
+ '2009': 5,
+ 'algorithm': 5,
+ 'anime': 2,
+ 'art': 2,
+ 'book': 9,
+ 'browser': 1,
+ 'color': 1,
+ 'comic': 4,
+ 'communication': 1,
+ 'compiler': 2,
+ 'conference': 1,
+ 'continuation': 1,
+ 'copyright': 2,
+ 'coq': 2,
+ 'cpan': 2,
+ 'cpp': 20,
+ 'css': 2,
+ 'cv': 1,
+ 'ddns': 4,
+ 'debian': 10,
+ 'debug': 6,
+ 'design': 2,
+ 'diy': 2,
+ 'dom': 3,
+ 'education': 7,
+ 'emacs': 15,
+ 'english': 4,
+ 'firefox': 14,
+ 'flash': 4,
+ 'font': 10,
+ 'gadget': 1,
+ 'git': 4,
+ 'gnuplot': 1,
+ 'graph': 1,
+ 'gui': 1,
+ 'hardware': 3,
+ 'haskell': 1,
+ 'hatena': 23,
+ 'haxe': 1,
+ 'html': 1,
+ 'ie': 1,
+ 'illusion': 1,
+ 'illustrator': 2,
+ 'image': 4,
+ 'javascript': 33,
+ 'javascrpit': 1,
+ 'keyboard': 1,
+ 'kurobox': 10,
+ 'language': 18,
+ 'lecture': 1,
+ 'library': 3,
+ 'life': 14,
+ 'linux': 25,
+ 'lisp': 2,
+ 'local': 1,
+ 'logic': 3,
+ 'mail': 2,
+ 'math': 6,
+ 'mobile': 1,
+ 'monad': 1,
+ 'music': 4,
+ 'mywork': 7,
+ 'neta': 66,
+ 'network': 8,
+ 'nlp': 3,
+ 'ocaml': 3,
+ 'paper': 6,
+ 'pdf': 8,
+ 'perl': 23,
+ 'photo': 2,
+ 'photoshop': 1,
+ 'plugin': 1,
+ 'postfix': 1,
+ 'presentation': 6,
+ 'programming': 22,
+ 'proof': 2,
+ 'puzzle': 1,
+ 'reference': 15,
+ 'research': 13,
+ 'rfc': 3,
+ 'ruby': 14,
+ 'science': 3,
+ 'security': 4,
+ 'sed': 1,
+ 'server': 3,
+ 'sf': 2,
+ 'shop': 3,
+ 'skk': 4,
+ 'smb': 2,
+ 'sound': 2,
+ 'ssh': 7,
+ 'ssl': 1,
+ 'standard': 3,
+ 'stl': 1,
+ 'template': 2,
+ 'test': 3,
+ 'tex': 7,
+ 'thrift': 1,
+ 'tips': 9,
+ 'tutorial': 6,
+ 'twist': 1,
+ 'typeset': 1,
+ 'ugomemo': 2,
+ 'ui': 2,
+ 'unicode': 4,
+ 'vfx': 4,
+ 'vim': 5,
+ 'vimperator': 7,
+ 'viper': 1,
+ 'virus': 2,
+ 'vpn': 1,
+ 'web': 16,
+ 'windows': 14,
+ 'workshop': 4,
+ 'xml': 3,
+ 'zsh': 5
+ };
+ var suggested = l.SuggestTagTest.suggestTags(document, tags);
+ if (!suggested) return;
+
+ var d = document;
+ var ul = d.createElement('ul');
+ var len = suggested.length;
+ if (len > limit) len = limit;
+ for (var i=0; i<len; i++) {
+ var li = d.createElement('li');
+ var t = suggested[i];
+ li.appendChild(d.createTextNode(t.word+' ('+t.score+')'));
+ ul.appendChild(li);
+ }
+ d.body.appendChild(ul);
+ };
+
+ if (l.SuggestTagTest.auto) {
+ l.SuggestTagTest.doTest();
+ }
+ };
+
+ /* library loader */
+
var A = {
filter: Array.filter || function(self, fun/*, thisp*/) {
var argi = 2;
@@ -98,468 +281,5 @@
return self;
};
- new Libs(url, null).loadEach(list, callback);
-})('http://labs.orezdnu.org/js/', [
- [ 'extract-content.js', 'WWW.LayeredExtractor' ],
- [ 'scoring-words.js', 'Scoring.RelativeWords' ]
-], function(l) {
- var Util = {
- inherit: function(child,parent) {
- var obj = child || {};
- for (var prop in parent) {
- if (typeof obj[prop] == 'undefined') {
- obj[prop] = parent[prop];
- }
- }
- return obj;
- },
- countMatch: function(text, regex) {
- return text.split(regex).length - 1;
- // var n=0;
- // for (var i=0;;) {
- // i = text.search(regex);
- // if (i < 0) break;
- // n++;
- // text = text.substr(i+1);
- // }
- // return n;
- },
- dump: function(obj) {
- if (typeof obj == 'undefined') return 'undefined';
- if (typeof obj == 'string') return '"' + obj + '"';
- if (typeof obj != 'object') return ''+obj;
- if (obj === null) return 'null';
- if (obj instanceof Array) {
- return '['
- + obj.map(function(v){return 'obj'/*Util.dump(v)*/;}).join(',')
- + ']';
- } else {
- var arr = [];
- for (var prop in obj) {
- arr.push(prop + ':' + 'obj'/*Util.dump(obj[prop])*/);
- }
- return '{' + arr.join(',') + '}';
- }
- }
- };
-
- var A = {
- indexOf: Array.indexOf || function(self, elt/*, from*/) {
- var argi = 2;
- var len = self.length;
- var from = Number(arguments[argi++]) || 0;
- from = (from < 0) ? Math.ceil(from) : Math.floor(from);
- if (from < 0) from += len;
- for (; from < len; from++) {
- if (from in self && self[from] === elt) return from;
- }
- return -1;
- },
- filter: Array.filter || function(self, fun/*, thisp*/) {
- var argi = 2;
- var len = self.length;
- if (typeof fun != "function") {
- throw new TypeError('A.filter: not a function');
- }
- var rv = new Array();
- var thisp = arguments[argi++];
- for (var i = 0; i < len; i++) {
- if (i in self) {
- var val = self[i]; // in case fun mutates this
- if (fun.call(thisp, val, i, self)) rv.push(val);
- }
- }
- return rv;
- },
- forEach: Array.forEach || function(self, fun/*, thisp*/) {
- var argi = 2;
- var len = self.length;
- if (typeof fun != 'function') {
- throw new TypeError('A.forEach: not a function');
- }
- var thisp = arguments[argi++];
- for (var i=0; i < len; i++) {
- if (i in self) fun.call(thisp, self[i], i, self);
- }
- },
- every: Array.every || function(self, fun/*, thisp*/) {
- var argi = 2;
- var len = self.length;
- if (typeof fun != 'function') {
- throw new TypeError('A.every: not a function');
- }
- var thisp = arguments[argi++];
- for (var i = 0; i < len; i++) {
- if (i in self &&
- !fun.call(thisp, self[i], i, self)) {
- return false;
- }
- }
- return true;
- },
- map: Array.map || function(self, fun/*, thisp*/) {
- var argi = 2;
- var len = self.length;
- if (typeof fun != 'function') {
- throw new TypeError('A.map: not a function');
- }
- var rv = new Array(len);
- var thisp = arguments[argi++];
- for (var i = 0; i < len; i++) {
- if (i in self) {
- rv[i] = fun.call(thisp, self[i], i, self);
- }
- }
- return rv;
- },
- some: Array.some || function(self, fun/*, thisp*/) {
- var argi = 2;
- var len = self.length;
- if (typeof fun != "function") {
- throw new TypeError('A.some: not a function');
- }
- var thisp = arguments[argi++];
- for (var i = 0; i < len; i++) {
- if (i in self &&
- fun.call(thisp, self[i], i, self)) {
- return true;
- }
- }
- return false;
- },
- reduce: Array.reduce || function(self, fun/*, initial*/) {
- var argi = 2;
- var len = self.length;
- if (typeof fun != 'function') {
- throw TypeError('A.reduce: not a function ');
- }
- var i = 0;
- var prev;
- if (arguments.length > argi) {
- var rv = arguments[argi++];
- } else {
- do {
- if (i in self) {
- rv = self[i++];
- break;
- }
- if (++i >= len) {
- throw new TypeError('A.reduce: empty array');
- }
- } while (true);
- }
- for (; i < len; i++) {
- if (i in self) rv = fun.call(null, rv, self[i], i, self);
- }
- return rv;
- },
- zip: function(self) {
- if (self[0] instanceof Array) {
- var l = self[0].length;
- var len = self.length;
- var z = new Array(l);
- for (var i=0; i < l; i++) {
- z[i] = [];
- for (var j=0; j < len; j++) {
- z[i].push(self[j][i]);
- }
- }
- return z;
- }
- return [];
- },
- first: function(self) {
- return self ? self[0] : null;
- },
- last: function(self) {
- return self ? self[self.length-1] : null;
- },
- push: function(self, other) {
- return Array.prototype.push.apply(self, other);
- }
- };
-
- var DOM = {
- getElementStyle: function(elem, prop) {
- var style = elem.style ? elem.style[prop] : null;
- if (!style) {
- var dv = document.defaultView;
- if (dv && dv.getComputedStyle) {
- try {
- var styles = dv.getComputedStyle(elem, null);
- } catch(e) {
- return null;
- }
- prop = prop.replace(/([A-Z])/g, '-$1').toLowerCase();
- style = styles ? styles.getPropertyValue(prop) : null;
- } else if (elem.currentStyle) {
- style = elem.currentStyle[prop];
- }
- }
- return style;
- },
- text: function(node) {
- if (typeof node.textContent != 'undefined') {
- return node.textContent;
- } else if (node.nodeName == '#text') {
- return node.nodeValue;
- } else if (typeof node.innerText != 'undefined') {
- return node.innerText; // IE
- }
- },
- ancestors: function(e) {
- var body = e.ownerDocument.body;
- var r = [];
- var it = e;
- while (it != body) {
- r.push(it);
- it = it.parentNode;
- }
- r.push(body);
- return r; // [e .. document.body]
- },
- commonAncestor: function(e1, e2) {
- var a1 = DOM.ancestors(e1).reverse();
- var a2 = DOM.ancestors(e2).reverse();
- var r = null;
- for (var i=0; a1[i] && a2[i] && a1[i] == a2[i]; i++) {
- r = a1[i];
- }
- return r;
- },
- countMatchTagAttr: function(node, tag, attr, regexs) {
- var test = function(v){ return v.test(node[attr]); };
- if ((node.tagName||'').toLowerCase()==tag && A.some(regexs,test)) {
- return 1;
- }
- var n=0;
- var children = node.childNodes;
- for (var i=0, len=children.length; i < len; i++) {
- n += DOM.countMatchTagAttr(children[i], tag, attr, regexs);
- }
- return n;
- },
- matchTag: function(node, pat) {
- return A.some(pat, function(v){
- if (typeof v == 'string') {
- return v == (node.tagName||'').toLowerCase();
- } else if (v instanceof Array) {
- return v[0] == (node.tagName||'').toLowerCase()
- && DOM.matchAttr(node, v[1]);
- } else {
- return false;
- }
- });
- },
- matchAttr: function(node, pat) {
- var test = function(pat, val) {
- if (typeof pat == 'string') {
- return pat == val;
- } else if (pat instanceof RegExp) {
- return pat.test(val);
- } else if (pat instanceof Array) {
- return A.some(pat,function(v){return test(v,val);});
- } else if (pat instanceof Object) {
- for (var prop in pat) {
- var n = node[prop];
- if (n && DOM.matchAttr(n, pat[prop])) {
- return true;
- }
- }
- }
- return false;
- };
- for (var prop in pat) {
- var attr = node[prop];
- var ar = pat[prop];
- if (attr) {
- return test(ar, attr);
- }
- }
- return false;
- },
- matchStyle: function(node, pat) {
- var test = function(pat, val) {
- if (typeof pat == 'string') {
- return pat == val;
- } else if (pat instanceof RegExp) {
- return pat.test(val);
- } else if (pat instanceof Array) {
- return A.some(pat,function(v){return test(v,val);});
- }
- return false;
- };
- for (var prop in pat) {
- if (test(pat[prop], DOM.getElementStyle(node, prop))) {
- return true;
- }
- }
- return false;
- }
- };
-
- if (typeof l.suggestTagTest == 'undefined') {
- var suggestTagTest = {};
- }
- var debug = l.suggestTagTest.debug;
-
- l.suggestTagTest.suggestTags = function(d, tags) {
- if (!d.body) return null;
-
- var ex = new WWW.LayeredExtractor;
-// ex.addHandler( ex.factory.getHandler('Description') );
-// ex.addHandler( ex.factory.getHandler('Scraper'));
-// ex.addHandler( ex.factory.getHandler('GoogleAdsence') );
- ex.addHandler( ex.factory.getHandler('Heuristics') );
- var res = ex.extract(d);
-
- if (!res.isSuccess) return null;
-
- var sc = new Scoring.RelativeWords();
- sc.addEngine( sc.factory.getEngine('TfIdf') );
- /* FIXME: other engines */
-
- return sc.top(res, tags);
- };
-
- l.suggestTagTest.doTest = function() {
- var limit = l.suggestTagTest.limit || 5;
- var tags = l.suggestTagTest.data || {
- '2008': 1,
- '2009': 5,
- 'algorithm': 5,
- 'anime': 2,
- 'art': 2,
- 'book': 9,
- 'browser': 1,
- 'color': 1,
- 'comic': 4,
- 'communication': 1,
- 'compiler': 2,
- 'conference': 1,
- 'continuation': 1,
- 'copyright': 2,
- 'coq': 2,
- 'cpan': 2,
- 'cpp': 20,
- 'css': 2,
- 'cv': 1,
- 'ddns': 4,
- 'debian': 10,
- 'debug': 6,
- 'design': 2,
- 'diy': 2,
- 'dom': 3,
- 'education': 7,
- 'emacs': 15,
- 'english': 4,
- 'firefox': 14,
- 'flash': 4,
- 'font': 10,
- 'gadget': 1,
- 'git': 4,
- 'gnuplot': 1,
- 'graph': 1,
- 'gui': 1,
- 'hardware': 3,
- 'haskell': 1,
- 'hatena': 23,
- 'haxe': 1,
- 'html': 1,
- 'ie': 1,
- 'illusion': 1,
- 'illustrator': 2,
- 'image': 4,
- 'javascript': 33,
- 'javascrpit': 1,
- 'keyboard': 1,
- 'kurobox': 10,
- 'language': 18,
- 'lecture': 1,
- 'library': 3,
- 'life': 14,
- 'linux': 25,
- 'lisp': 2,
- 'local': 1,
- 'logic': 3,
- 'mail': 2,
- 'math': 6,
- 'mobile': 1,
- 'monad': 1,
- 'music': 4,
- 'mywork': 7,
- 'neta': 66,
- 'network': 8,
- 'nlp': 3,
- 'ocaml': 3,
- 'paper': 6,
- 'pdf': 8,
- 'perl': 23,
- 'photo': 2,
- 'photoshop': 1,
- 'plugin': 1,
- 'postfix': 1,
- 'presentation': 6,
- 'programming': 22,
- 'proof': 2,
- 'puzzle': 1,
- 'reference': 15,
- 'research': 13,
- 'rfc': 3,
- 'ruby': 14,
- 'science': 3,
- 'security': 4,
- 'sed': 1,
- 'server': 3,
- 'sf': 2,
- 'shop': 3,
- 'skk': 4,
- 'smb': 2,
- 'sound': 2,
- 'ssh': 7,
- 'ssl': 1,
- 'standard': 3,
- 'stl': 1,
- 'template': 2,
- 'test': 3,
- 'tex': 7,
- 'thrift': 1,
- 'tips': 9,
- 'tutorial': 6,
- 'twist': 1,
- 'typeset': 1,
- 'ugomemo': 2,
- 'ui': 2,
- 'unicode': 4,
- 'vfx': 4,
- 'vim': 5,
- 'vimperator': 7,
- 'viper': 1,
- 'virus': 2,
- 'vpn': 1,
- 'web': 16,
- 'windows': 14,
- 'workshop': 4,
- 'xml': 3,
- 'zsh': 5
- };
- var suggested = l.suggestTagTest.suggestTags(document, tags);
- if (!suggested) return;
-
- var d = document;
- var ul = d.createElement('ul');
- var len = suggested.length;
- if (len > limit) len = limit;
- for (var i=0; i<len; i++) {
- var li = d.createElement('li');
- var t = suggested[i];
- li.appendChild(d.createTextNode(t.word + ' (' + t.score + ')'));
- ul.appendChild(li);
- }
- d.body.appendChild(ul);
- };
-
- if (l.suggestTagTest.auto) {
- l.suggestTagTest.doTest();
- }
-});
+ new Libs(url, null).loadEach(libs, testFunc);
+})();

0 comments on commit 6ad4532

Please sign in to comment.
Something went wrong with that request. Please try again.