Permalink
Browse files

misc

  • Loading branch information...
1 parent 967df0c commit ed2c36405a1aeab50360f1520683cb1f2dcac588 @arrix arrix committed Nov 23, 2010
Showing with 51 additions and 6 deletions.
  1. +4 −0 .gitignore
  2. +11 −0 notes.txt
  3. +2 −0 package.sh
  4. +1 −0 test.txt
  5. +9 −6 test/grab-pages.rb
  6. +24 −0 test/jsdom-bug.js
View
@@ -1 +1,5 @@
.DS_Store
+log/*.log
+dist/*
+*.tmproj
+
View
@@ -26,6 +26,11 @@ readability.getInnerText is very frequently used function. My optimization for i
# cleanStyles
cleanStyles is recursive, it counts for most running time of prepArticle
+
+# security
+arbitrary js
+frames
+
# performance
grep TOTAL clean.log|cut -d ' ' -f5|sort -n
@@ -39,6 +44,12 @@ def hist(array)
end
+
+def avg(s, regex)
+ a = s.scan(regex).flatten.map(&:to_f)
+ a.reduce{|x,y| x+y}/a.size
+end
+
# sum profiler output
s = <<EOT
19 Nov 12:56:08 - 0.233 seconds [killBreaks]
View
@@ -0,0 +1,2 @@
+NAME=node-readability
+tar -zcf ./dist/readability.tgz -C .. --exclude=".*" --exclude="test*" $NAME/lib $NAME/LICENSE.txt $NAME/README.md $NAME/package.json
View
@@ -10,6 +10,7 @@ problems:
slow
http://127.0.0.1:3000/?url=http://www.gazeta.ru/news/lastnews/
http://127.0.0.1:3000/?url=http://www.sqlite.org/fts3.html
+http://127.0.0.1:3000/?url=http://news.google.com.hk/nwshp?hl=zh-tw&tab=in
returned html cannot be parsed by browser
http://blog.zacharyvoase.com/2010/11/11/sockets-and-nodes-i/
View
@@ -41,12 +41,15 @@ def parse_rss(feed)
def run
dir = File.expand_path('../pages', __FILE__)
FileUtils.mkdir(dir) unless File.exists? dir
-
- parse_rss(fetch_digg_feed) do |url, title|
- filename = title.gsub(/\W/, '_') + '.html'
- filepath = File.join(dir, filename)
- puts "fetching #{url} as #{filepath}"
- puts `curl #{url} > #{filepath} &`
+
+ [fetch_digg_feed, fetch_hackernews_feed, fetch_delicious_feed].each do |feed|
+ parse_rss(feed) do |url, title|
+ filename = title.gsub(/\W/, '_') + '.html'
+ filepath = File.join(dir, filename)
+ puts "fetching #{url} as #{filepath}"
+ puts `curl --connect-timeout=5 #{url} > #{filepath} &`
+ sleep 1
+ end
end
end
View
@@ -17,3 +17,27 @@ node.parentNode.removeChild(node);
console.log(''+all[i]); //still P#p1. the live NodeList wasn't updated properly
all.length; //trigger a refresh. the length getter calls update()
console.log(''+all[i]); //P#p2 OK
+
+
+// innerHTML = '' doesn't removed all children
+// https://github.com/tmpvar/jsdom/issues/#issue/80
+(function() {
+ var jsdom = require('jsdom');
+ var html = '<html><body><p id="p1"></p><p id="p2"></p></body></html>';
+ var doc = jsdom.jsdom(html);
+ var win = doc.createWindow();
+ var b = doc.body;
+ b.innerHTML = '';
+ console.log(b.innerHTML); //<p id="p2"></p>
+
+ var arr = [0, 1, 2, 3, 4, 5];
+ arr.forEach(function(v, i) {
+ console.log('[', i, '] ==', v);
+ arr.splice(i, 1);
+ });
+ // output
+ // [ 0 ] == 0
+ // [ 1 ] == 2
+ // [ 2 ] == 4
+
+})();

0 comments on commit ed2c364

Please sign in to comment.