Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP
Browse files

1. use yui instead of stale yui3 npm package

2. added copyright
3. bump package version
  • Loading branch information...
commit 3cd3d102963b7cde5cb9d0aecb3dac7e3b53b0ec 1 parent e5cd46f
huang47 authored
View
6 LICENSE
@@ -0,0 +1,6 @@
+Copyrights for code authored by Yahoo! Inc. is licensed under the following terms:
+MIT License
+Copyright (c) 2012 Yahoo! Inc. All Rights Reserved.
+Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
View
5 README.md
@@ -8,9 +8,8 @@ npm install css-crawler
* host : web site url to fetch
* rule : css selector
-* callback : name of callback function (optional, default
-is callback)
-* htmlFormat : innerHTML or outerHTML (optional, default is outerHTML)
+* callback : (optional) name of callback function (default: callback)
+* htmlFormat : (optional) innerHTML or outerHTML (default: outerHTML)
## APPENDIX
### [github][1]
View
74 css-crawler.js
@@ -1,15 +1,35 @@
-var util = require('util'),
- YUI = require('yui3').YUI,
- EventEmitter = require('events').EventEmitter;
+var util = require('util')
+ , YUI = require('yui').YUI
+ , jsdom = require('jsdom')
+ , EventEmitter = require('events').EventEmitter;
module.exports = new EventEmitter();
+//Turn off all the things we don't want.
+jsdom.defaultDocumentFeatures = {
+ //Don't bring in outside resources
+ FetchExternalResources : false,
+ //Don't process them
+ ProcessExternalResources : false,
+ //Don't expose Mutation events (for performance)
+ MutationEvents : false,
+ //Do not use their implementation of QSA
+ QuerySelector : false
+};
+
+var dom = jsdom.defaultLevel;
+//Hack in focus and blur methods so they don't fail when a YUI widget calls them
+dom.Element.prototype.blur = function() {};
+dom.Element.prototype.focus = function() {};
+
+//Create the document and window
module.exports.fetch = function (config) {
- var host = config.host,
- rule = config.rule,
- callback = config.callback,
- format = config.format,
- obj = {};
+ var host = config.host
+ , rule = config.rule
+ , callback = config.callback || 'callback'
+ , format = config.format || 'outerHTML'
+ , obj = {}
+ , document;
// added http prefix
if (-1 === host.indexOf('http://')) {
@@ -17,22 +37,32 @@ module.exports.fetch = function (config) {
}
if (host && rule) {
- YUI({ debug: false }).use('node', 'io', function (Y) {
- Y.fetch(host, function () {
- var results = Y.all(rule),
- items = [], item;
-
- results.each(function (n) {
- item = n.get(format);
- items.push(item);
+ jsdom.env({ html: host, done: function (errors, win) {
+ doc = win.document;
+
+ YUI({
+ win: win,
+ doc: win.document
+ }).use('node', 'io', function (Y) {
+ Y.on('io:complete', function (id, o, c) {
+ var results = Y.all(rule),
+ items = [], item;
+
+ results.each(function (n) {
+ item = n.get(format);
+ items.push(item);
+ });
+
+ obj.rule = rule;
+ obj.host = host;
+ obj.callback = callback;
+ obj.results = items;
+ module.exports.emit('data', obj);
});
- obj.rule = rule;
- obj.host = host;
- obj.callback = callback;
- obj.results = items;
- module.exports.emit('data', obj);
+ Y.io(host);
});
- });
+ }});
+
}
}
View
17 demo/DEMO.md
@@ -0,0 +1,17 @@
+# query web via css selector
+
+## idea
+This is inspired by YUI3 + NodeJS how they deal with DOM
+It's the essential of YQL as well
+However, css selector is more friendly in terms of web development than xpath
+
+## run
+./server (run on default port 10633)
+
+./server 3333 (run on specified port 3333)
+
+## demo
+
+1. /demo/profile: twitter/github
+1. /demo/index: instant search for multiple Yahoo! properties
+1. /demo/app: instant search for Yahoo! Apps Search
View
138 demo/app.html
@@ -0,0 +1,138 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+ <meta http-equiv="content-type" content="text/html; charset=utf-8">
+ <title>Yahoo Apps Instant</title>
+ <link rel="stylesheet" href="http://yui.yahooapis.com/3.3.0/build/cssreset/reset.css" type="text/css">
+ <link rel="stylesheet" href="http://yui.yahooapis.com/3.3.0/build/cssfonts/fonts.css" type="text/css">
+ <link rel="stylesheet" href="http://yui.yahooapis.com/3.3.0/build/cssgrids/grids.css" type="text/css">
+ <script src="http://yui.yahooapis.com/3.3.0/build/yui/yui-min.js"></script>
+<style>
+ html {
+ background-color; #ffffff;
+ background-image: url("");
+ background-repeat: repeat-x;
+ overflow-y: scroll;
+ }
+ body {
+ margin: 20px auto;
+ width: 960px;
+ }
+ body label, body input {
+ font-size: 24px;
+ width: 600px;
+ margin-bottom: 20px;
+ }
+
+ #main ul {
+ list-style: none;
+ }
+
+ #main ul li {
+ margin-bottom: 15px;
+ }
+
+ .app-res {
+ width: 600px;
+ margin-left: 80px;
+ background: url("http://a.l.yimg.com/a/lib/s9/app-repeat-bg-20110615.png") repeat-x scroll 0 -644px transparent;
+ }
+
+ .app-res .left {
+ float: left;
+ margin-right: 20px;
+ }
+
+ .app-res .left img {
+ border-radius: 10px;
+ margin: 5px 0 0 5px;
+ }
+
+ .app-res .center {
+ width: 200px;
+ overflow: hidden;
+ margin-left: 80px;
+ }
+
+ .app-res .right {
+ overflow: hidden;
+ margin-left: 80px;
+ }
+
+ .stars-lg span {
+ background: url("http://a.l.yimg.com/a/lib/s9/app-srp-bg-20110610.png") no-repeat scroll 0 -95px transparent;
+ font-size: 0;
+ display: inline-block;
+ height: 14px;
+ width: 14px;
+ }
+
+ .stars-lg span.empty {
+ background-position: -30px -95px;
+ }
+
+ .app-res .getitnow .small_get_btn {
+ background: url("http://a.l.yimg.com/a/lib/s9/app-srp-bg-20110610.png") repeat scroll 0 -63px transparent;
+ display: block;
+ font-size: 15px;
+ font-weight: bold;
+ height: 26px;
+ line-height: 20px;
+ overflow: hidden;
+ text-align: center;
+ text-indent: 0;
+ white-space: nowrap;
+ width: 91px;
+ }
+</style>
+
+</head>
+<body>
+ <label for="query">query : </label>
+ <input type="text" name="query" id="query" value="plants" />
+ <div id="main">
+ <ul class="result-list">
+ </ul>
+ </div>
+
+ <script type="text/javascript">
+YUI().use('node', 'node-event-simulate', function (Y) {
+ var d = document,
+ head = document.getElementsByTagName('head')[0],
+ query = Y.one('#query'),
+ api = '/api',
+ host = 'http://apps.search.yahoo.com/search?p=',
+ rule = '#main .app-res',
+ callback = 'inst',
+ htmlFormat = 'outerHTML',
+ script;
+
+ window.inst = function (obj) {
+ var images = d.images,
+ html = '', r;
+ Y.each(obj.results, function (v, k) {
+ html += '<li>' + v + '</li>';
+ });
+ Y.one('#main ul').set('innerHTML', html);
+ Y.each(images, function (image) {
+ image.setAttribute('src', image.getAttribute('data-src'));
+ });
+ };
+
+ query.on('keyup', function () {
+ script = d.createElement('script');
+ script.type="text/javascript";
+ script.src = api + '?' + 'host=' + host + query.get('value')
+ + '&rule=' + encodeURIComponent(rule)
+ + '&callback=' + callback
+ + '&htmlFormat=' + htmlFormat;
+ head.appendChild(script);
+ });
+
+ if (query.get('value')) {
+ query.simulate('keyup');
+ }
+});
+ </script>
+</body>
+</html>
View
182 demo/index.html
@@ -0,0 +1,182 @@
+<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
+<html>
+<head>
+ <meta http-equiv="content-type" content="text/html; charset=utf-8">
+ <title>SRP Grids</title>
+ <link rel="stylesheet" href="http://yui.yahooapis.com/3.3.0/build/cssreset/reset.css" type="text/css">
+ <link rel="stylesheet" href="http://yui.yahooapis.com/3.3.0/build/cssfonts/fonts.css" type="text/css">
+ <link rel="stylesheet" href="http://yui.yahooapis.com/3.3.0/build/cssgrids/grids.css" type="text/css">
+ <script src="http://yui.yahooapis.com/3.3.0/build/yui/yui-min.js"></script>
+<style>
+body {
+ margin: auto;
+ width:960px;
+}
+
+#hd, #ft {
+ height: 50px;
+ margin-top: 20px;
+}
+
+#hd label, #hd input {
+ font-size: 24px;
+}
+
+#hd input { width: 600px; }
+
+h2 {
+ color: #fff;
+ font-weight: bold;
+}
+
+.yui3-u-1-3 {
+ height: 240px;
+}
+
+#shopping {
+ background-color: #f7949c;
+}
+
+#finance {
+ background-color: #45e6e4;
+}
+
+#dir {
+ background-color: #8caf8c;
+}
+
+#video {
+ background-color: #c00;
+}
+
+#images {
+ background-color: #4c6e9b;
+}
+
+#apps {
+ background-color: #ce8b43;
+}
+
+</style>
+
+</head>
+<body>
+ <div id="hd">
+ <label for="query">query : </label>
+ <input type="text" id="query" name="query" value="plants"/>
+ </div>
+
+ <div id="bd">
+ <div class="yui3-g" id="boring">
+ <div class="yui3-u-1-3" id="shopping">
+ <div class="content"><h2>Shopping</h2></div>
+ </div>
+
+ <div class="yui3-u-1-3" id="finance">
+ <div class="content"><h2>Finance</h2></div>
+ </div>
+
+ <div class="yui3-u-1-3" id="dir">
+ <div class="content"><h2>Directory</h2></div>
+ </div>
+ </div>
+ <div class="yui3-g" id="entertainment">
+ <div class="yui3-u-1-3" id="video">
+ <div class="content"><h2>Video</h2></div>
+ </div>
+
+ <div class="yui3-u-1-3" id="images">
+ <div class="content"><h2>Image</h2></div>
+ </div>
+
+ <div class="yui3-u-1-3" id="apps">
+ <div class="content"><h2>Apps</h2></div>
+ </div>
+ </div>
+ </div>
+
+ <div id="ft">
+ <h3>Have a nice day!</h3>
+ </div>
+
+ <script type="text/javascript">
+YUI().use('node', 'node-event-simulate', function (Y) {
+ window.updateVertical = function (obj) {
+ var html = '',
+ url = obj.host.substring(7),
+ vert = '#' + url.substring(0, url.indexOf('.')),
+ ul = Y.one(vert + ' ul');
+
+ if (!ul) {
+ ul = Y.Node.create('<ul></ul>');
+ Y.one(vert).appendChild(ul);
+ }
+
+ Y.Array.each(obj.results, function (item) {
+ html += '<li>' + item + '<li>';
+ });
+ ul.set('innerHTML', html);
+ };
+
+ var d = document,
+ head = d.getElementsByTagName('head')[0],
+ api = '/api',
+ q = Y.one('#query'),
+ script, handleReq;
+
+ handleReq = function (host, rule, callback, htmlFormat) {
+ callback = callback || 'updateVertical';
+ htmlFormat = htmlFormat || 'outerHTML';
+ var obj = {
+ host: host,
+ rule: encodeURIComponent(rule),
+ callback: callback,
+ htmlFormat: htmlFormat
+ };
+
+ script = d.createElement('script'),
+ script.type = "text/javascript";
+ script.src = (function () {
+ var params = [];
+ Y.each(obj, function (v, k) {
+ params.push(k + '=' + v);
+ });
+ return api + '?' + params.join('&');
+ })();
+ head.appendChild(script);
+ };
+
+ q.on('keyup', function (e) {
+ var value = q.get('value'),
+ query = encodeURIComponent(value.replace(/^\s+|\s+$/g, ''));
+
+ if (!query) {
+ return ;
+ }
+
+ // shopping
+ handleReq('http://shopping.yahoo.com/search?p=' + query, '#bd .hproducts .summary h2 a');
+
+ // apps
+ handleReq('http://apps.search.yahoo.com/search?p=' + query, '#main .app-res h3 a');
+
+ // finance
+ handleReq('http://finance.search.yahoo.com/search?p=' + query, '#web .res h3 a');
+
+ // directory
+ handleReq('http://dir.search.yahoo.com/search?p=' + query, '#dir .result_mix h3 a');
+
+ // video
+ handleReq('http://video.search.yahoo.com/video?p=' + query, '#buzz .pane li a span');
+
+ // images
+ handleReq('http://images.search.yahoo.com/images?p=' + query, '#car-content li .info a');
+ });
+
+ if (q.get('value')) {
+ q.simulate('keyup');
+ }
+});
+ </script>
+</body>
+</html>
View
102 demo/profile.html
@@ -0,0 +1,102 @@
+<!DOCTYPE html>
+<html>
+<head>
+ <meta http-equiv="content-type" content="text/html; charset=utf-8">
+ <title>Huang47's profile</title>
+ <link rel="stylesheet" href="http://yui.yahooapis.com/3.3.0/build/cssreset/reset.css" type="text/css">
+ <link rel="stylesheet" href="http://yui.yahooapis.com/3.3.0/build/cssfonts/fonts.css" type="text/css">
+ <link rel="stylesheet" href="http://yui.yahooapis.com/3.3.0/build/cssgrids/grids.css" type="text/css">
+ <link rel="stylesheet" href="http://a1.twimg.com/twitter-mobile/d8fe8d40bf03a3c3029bafcbbe110d9340f93f12/assets/base.css" type="text/css">
+ <script src="http://yui.yahooapis.com/3.3.0/build/yui/yui-min.js"></script>
+ <style>
+ .column { width: 600px; }
+ #twitter, #github { float: left; }
+ #twitter {
+ background: url("") repeat-x scroll 0 0 #C0DEED;
+ }
+ #tweets-list { width: 520px; margin: 0 auto; }
+ .list-tweet .status {
+ display: block;
+ margin-top: 0.2em;
+ }
+ .repo label { font-size: 1.2em; }
+ .repo .attribute { font-size: 1.5em; }
+ .repo {
+ padding: 1em;
+ border-radius: 10px;
+ margin-bottom: 1em;
+ background-image: url("");
+ background-repeat: repeat-x;
+ }
+ </style>
+</head>
+<body>
+ <div id="twitter" class="column">
+ <ul id="tweets-list"></ul>
+ </div>
+ <div id="github" class="column">
+ <ul id="repos"></ul>
+ </div>
+
+ <script type="text/javascript">
+YUI().use('node', function (Y) {
+ var d = document,
+ head = document.getElementsByTagName('head')[0],
+ api = '/api',
+ callback = 'inst',
+ htmlFormat = 'outerHTML',
+ script, handleReq;
+
+ handleReq = function (host, rule) {
+ var obj = {
+ host: host,
+ rule: encodeURIComponent(rule),
+ callback: callback,
+ htmlFormat: htmlFormat
+ };
+
+ script = d.createElement('script'),
+ script.type = "text/javascript";
+ script.src = (function () {
+ var params = [];
+ Y.each(obj, function (v, k) {
+ params.push(k + '=' + v);
+ });
+ return api + '?' + params.join('&');
+ })();
+ head.appendChild(script);
+ };
+
+ window.inst = function (obj) {
+ var html = '';
+ Y.each(obj.results, function (v, k) {
+ html += '<li>' + v + '</li>';
+ });
+ Y.one('#tweets-list').set('innerHTML', html);
+ };
+
+ handleReq('http://mobile.twitter.com/huang47', '#tweets-list .list-tweet');
+
+ window.github = function (obj) {
+ var repos = obj.repositories,
+ html = '';
+
+ Y.Array.each(repos, function (repo) {
+ html += '<li class="repo">';
+ Y.each(repo, function (v, k) {
+ html += '<label for="' + k + '">' + k + '</label>' +
+ '<div name="' + k + '" class="attribute">' + v + '</div>';
+ });
+ html += '</li>';
+ });
+ Y.one('#repos').set('innerHTML', html);
+ }
+
+ script = d.createElement('script'),
+ script.type = "text/javascript";
+ script.src = 'http://github.com/api/v2/json/repos/show/huang47?sortBy=pushed_at&callback=github';
+ head.appendChild(script);
+});
+ </script>
+</body>
+</html>
View
53 demo/server.js
@@ -0,0 +1,53 @@
+#!/usr/bin/env node
+var app = require('express').createServer()
+ , YUI = require('yui').YUI
+ , cc = require('../css-crawler.js')
+ , url = require('url')
+ , querystring = require('querystring')
+ , fs = require('fs')
+ , md = require('markdown')
+ , PORT = process.argv[2] || 10633;
+
+app.get('/api', function (req, res) {
+ var query = querystring.parse(url.parse(req.url).query)
+ , params = query
+ , host = query.host
+ , rule = query.rule
+ , htmlFormat = query.htmlFormat || 'outerHTML'
+ , callback = query.callback || 'callback';
+
+ cc.fetch({
+ host: host,
+ rule: rule,
+ format: htmlFormat,
+ callback: callback
+ });
+
+ cc.on('data', function (data) {
+// res.header('Content-Type', 'application/json');
+ res.send(callback + '(' + JSON.stringify(data) + ')');
+ });
+});
+
+app.get('/demo/:page', function (req, res) {
+ fs.readFile(req.params.page + '.html', function (err, data) {
+ if (err) {
+ }
+ res.contentType('text/html');
+ res.send(data);
+ });
+});
+
+app.get('/readme', function (req, res) {
+ fs.readFile('./DEMO.md', function (err, data) {
+ var output = '';
+ if (err) {
+ }
+ res.contentType('text/html');
+ output = md.markdown.toHTML(data.toString());
+ res.send(output);
+ });
+});
+
+app.listen(PORT);
+console.log('server is running at port ' + PORT);
View
5 package.json
@@ -2,7 +2,7 @@
"author": "huang47 <huge.huang@gmail.com> (huang47.blogspot.com)",
"name": "css-crawler",
"description": "Crawl web via css selector",
- "version": "0.1.9",
+ "version": "0.3.1",
"repository": {
"type": "git",
"url": "git://github.com/huang47/css-crawler.git"
@@ -11,8 +11,7 @@
"node": "*"
},
"dependencies": {
- "yui3": "0.7.11",
- "express": "2.5.1"
+ "yui": "*"
},
"devDependencies": {},
"main": "css-crawler"
View
6 run
@@ -1,12 +1,10 @@
#!/usr/bin/env node
-var c = require('css-crawler');
+var c = require('./css-crawler.js');
c.fetch({
host: 'http://news.ycombinator.com',
- rule: 'table td.title a',
- callback: 'callback',
- format: 'innerHTML'
+ rule: 'table td.title a'
});
c.on('data', function (obj) {
Please sign in to comment.
Something went wrong with that request. Please try again.