Skip to content
This repository
Browse code

1. use yui instead of stale yui3 npm package

2. added copyright
3. bump package version
  • Loading branch information...
commit 3cd3d102963b7cde5cb9d0aecb3dac7e3b53b0ec 1 parent e5cd46f
Huge authored
6 LICENSE
... ... @@ -0,0 +1,6 @@
  1 +Copyrights for code authored by Yahoo! Inc. is licensed under the following terms:
  2 +MIT License
  3 +Copyright (c) 2012 Yahoo! Inc. All Rights Reserved.
  4 +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:
  5 +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.
  6 +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
5 README.md
Source Rendered
@@ -8,9 +8,8 @@ npm install css-crawler
8 8
9 9 * host : web site url to fetch
10 10 * rule : css selector
11   -* callback : name of callback function (optional, default
12   -is callback)
13   -* htmlFormat : innerHTML or outerHTML (optional, default is outerHTML)
  11 +* callback : (optional) name of callback function (default: callback)
  12 +* htmlFormat : (optional) innerHTML or outerHTML (default: outerHTML)
14 13
15 14 ## APPENDIX
16 15 ### [github][1]
74 css-crawler.js
... ... @@ -1,15 +1,35 @@
1   -var util = require('util'),
2   - YUI = require('yui3').YUI,
3   - EventEmitter = require('events').EventEmitter;
  1 +var util = require('util')
  2 + , YUI = require('yui').YUI
  3 + , jsdom = require('jsdom')
  4 + , EventEmitter = require('events').EventEmitter;
4 5
5 6 module.exports = new EventEmitter();
6 7
  8 +//Turn off all the things we don't want.
  9 +jsdom.defaultDocumentFeatures = {
  10 + //Don't bring in outside resources
  11 + FetchExternalResources : false,
  12 + //Don't process them
  13 + ProcessExternalResources : false,
  14 + //Don't expose Mutation events (for performance)
  15 + MutationEvents : false,
  16 + //Do not use their implementation of QSA
  17 + QuerySelector : false
  18 +};
  19 +
  20 +var dom = jsdom.defaultLevel;
  21 +//Hack in focus and blur methods so they don't fail when a YUI widget calls them
  22 +dom.Element.prototype.blur = function() {};
  23 +dom.Element.prototype.focus = function() {};
  24 +
  25 +//Create the document and window
7 26 module.exports.fetch = function (config) {
8   - var host = config.host,
9   - rule = config.rule,
10   - callback = config.callback,
11   - format = config.format,
12   - obj = {};
  27 + var host = config.host
  28 + , rule = config.rule
  29 + , callback = config.callback || 'callback'
  30 + , format = config.format || 'outerHTML'
  31 + , obj = {}
  32 + , document;
13 33
14 34 // added http prefix
15 35 if (-1 === host.indexOf('http://')) {
@@ -17,22 +37,32 @@ module.exports.fetch = function (config) {
17 37 }
18 38
19 39 if (host && rule) {
20   - YUI({ debug: false }).use('node', 'io', function (Y) {
21   - Y.fetch(host, function () {
22   - var results = Y.all(rule),
23   - items = [], item;
24   -
25   - results.each(function (n) {
26   - item = n.get(format);
27   - items.push(item);
  40 + jsdom.env({ html: host, done: function (errors, win) {
  41 + doc = win.document;
  42 +
  43 + YUI({
  44 + win: win,
  45 + doc: win.document
  46 + }).use('node', 'io', function (Y) {
  47 + Y.on('io:complete', function (id, o, c) {
  48 + var results = Y.all(rule),
  49 + items = [], item;
  50 +
  51 + results.each(function (n) {
  52 + item = n.get(format);
  53 + items.push(item);
  54 + });
  55 +
  56 + obj.rule = rule;
  57 + obj.host = host;
  58 + obj.callback = callback;
  59 + obj.results = items;
  60 + module.exports.emit('data', obj);
28 61 });
29 62
30   - obj.rule = rule;
31   - obj.host = host;
32   - obj.callback = callback;
33   - obj.results = items;
34   - module.exports.emit('data', obj);
  63 + Y.io(host);
35 64 });
36   - });
  65 + }});
  66 +
37 67 }
38 68 }
17 demo/DEMO.md
Source Rendered
... ... @@ -0,0 +1,17 @@
  1 +# query web via css selector
  2 +
  3 +## idea
  4 +This is inspired by YUI3 + NodeJS how they deal with DOM
  5 +It's the essential of YQL as well
  6 +However, css selector is more friendly in terms of web development than xpath
  7 +
  8 +## run
  9 +./server (run on default port 10633)
  10 +
  11 +./server 3333 (run on specified port 3333)
  12 +
  13 +## demo
  14 +
  15 +1. /demo/profile: twitter/github
  16 +1. /demo/index: instant search for multiple Yahoo! properties
  17 +1. /demo/app: instant search for Yahoo! Apps Search
138 demo/app.html
... ... @@ -0,0 +1,138 @@
  1 +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
  2 +<html>
  3 +<head>
  4 + <meta http-equiv="content-type" content="text/html; charset=utf-8">
  5 + <title>Yahoo Apps Instant</title>
  6 + <link rel="stylesheet" href="http://yui.yahooapis.com/3.3.0/build/cssreset/reset.css" type="text/css">
  7 + <link rel="stylesheet" href="http://yui.yahooapis.com/3.3.0/build/cssfonts/fonts.css" type="text/css">
  8 + <link rel="stylesheet" href="http://yui.yahooapis.com/3.3.0/build/cssgrids/grids.css" type="text/css">
  9 + <script src="http://yui.yahooapis.com/3.3.0/build/yui/yui-min.js"></script>
  10 +<style>
  11 + html {
  12 + background-color; #ffffff;
  13 + background-image: url("");
  14 + background-repeat: repeat-x;
  15 + overflow-y: scroll;
  16 + }
  17 + body {
  18 + margin: 20px auto;
  19 + width: 960px;
  20 + }
  21 + body label, body input {
  22 + font-size: 24px;
  23 + width: 600px;
  24 + margin-bottom: 20px;
  25 + }
  26 +
  27 + #main ul {
  28 + list-style: none;
  29 + }
  30 +
  31 + #main ul li {
  32 + margin-bottom: 15px;
  33 + }
  34 +
  35 + .app-res {
  36 + width: 600px;
  37 + margin-left: 80px;
  38 + background: url("http://a.l.yimg.com/a/lib/s9/app-repeat-bg-20110615.png") repeat-x scroll 0 -644px transparent;
  39 + }
  40 +
  41 + .app-res .left {
  42 + float: left;
  43 + margin-right: 20px;
  44 + }
  45 +
  46 + .app-res .left img {
  47 + border-radius: 10px;
  48 + margin: 5px 0 0 5px;
  49 + }
  50 +
  51 + .app-res .center {
  52 + width: 200px;
  53 + overflow: hidden;
  54 + margin-left: 80px;
  55 + }
  56 +
  57 + .app-res .right {
  58 + overflow: hidden;
  59 + margin-left: 80px;
  60 + }
  61 +
  62 + .stars-lg span {
  63 + background: url("http://a.l.yimg.com/a/lib/s9/app-srp-bg-20110610.png") no-repeat scroll 0 -95px transparent;
  64 + font-size: 0;
  65 + display: inline-block;
  66 + height: 14px;
  67 + width: 14px;
  68 + }
  69 +
  70 + .stars-lg span.empty {
  71 + background-position: -30px -95px;
  72 + }
  73 +
  74 + .app-res .getitnow .small_get_btn {
  75 + background: url("http://a.l.yimg.com/a/lib/s9/app-srp-bg-20110610.png") repeat scroll 0 -63px transparent;
  76 + display: block;
  77 + font-size: 15px;
  78 + font-weight: bold;
  79 + height: 26px;
  80 + line-height: 20px;
  81 + overflow: hidden;
  82 + text-align: center;
  83 + text-indent: 0;
  84 + white-space: nowrap;
  85 + width: 91px;
  86 + }
  87 +</style>
  88 +
  89 +</head>
  90 +<body>
  91 + <label for="query">query : </label>
  92 + <input type="text" name="query" id="query" value="plants" />
  93 + <div id="main">
  94 + <ul class="result-list">
  95 + </ul>
  96 + </div>
  97 +
  98 + <script type="text/javascript">
  99 +YUI().use('node', 'node-event-simulate', function (Y) {
  100 + var d = document,
  101 + head = document.getElementsByTagName('head')[0],
  102 + query = Y.one('#query'),
  103 + api = '/api',
  104 + host = 'http://apps.search.yahoo.com/search?p=',
  105 + rule = '#main .app-res',
  106 + callback = 'inst',
  107 + htmlFormat = 'outerHTML',
  108 + script;
  109 +
  110 + window.inst = function (obj) {
  111 + var images = d.images,
  112 + html = '', r;
  113 + Y.each(obj.results, function (v, k) {
  114 + html += '<li>' + v + '</li>';
  115 + });
  116 + Y.one('#main ul').set('innerHTML', html);
  117 + Y.each(images, function (image) {
  118 + image.setAttribute('src', image.getAttribute('data-src'));
  119 + });
  120 + };
  121 +
  122 + query.on('keyup', function () {
  123 + script = d.createElement('script');
  124 + script.type="text/javascript";
  125 + script.src = api + '?' + 'host=' + host + query.get('value')
  126 + + '&rule=' + encodeURIComponent(rule)
  127 + + '&callback=' + callback
  128 + + '&htmlFormat=' + htmlFormat;
  129 + head.appendChild(script);
  130 + });
  131 +
  132 + if (query.get('value')) {
  133 + query.simulate('keyup');
  134 + }
  135 +});
  136 + </script>
  137 +</body>
  138 +</html>
182 demo/index.html
... ... @@ -0,0 +1,182 @@
  1 +<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN" "http://www.w3.org/TR/html4/strict.dtd">
  2 +<html>
  3 +<head>
  4 + <meta http-equiv="content-type" content="text/html; charset=utf-8">
  5 + <title>SRP Grids</title>
  6 + <link rel="stylesheet" href="http://yui.yahooapis.com/3.3.0/build/cssreset/reset.css" type="text/css">
  7 + <link rel="stylesheet" href="http://yui.yahooapis.com/3.3.0/build/cssfonts/fonts.css" type="text/css">
  8 + <link rel="stylesheet" href="http://yui.yahooapis.com/3.3.0/build/cssgrids/grids.css" type="text/css">
  9 + <script src="http://yui.yahooapis.com/3.3.0/build/yui/yui-min.js"></script>
  10 +<style>
  11 +body {
  12 + margin: auto;
  13 + width:960px;
  14 +}
  15 +
  16 +#hd, #ft {
  17 + height: 50px;
  18 + margin-top: 20px;
  19 +}
  20 +
  21 +#hd label, #hd input {
  22 + font-size: 24px;
  23 +}
  24 +
  25 +#hd input { width: 600px; }
  26 +
  27 +h2 {
  28 + color: #fff;
  29 + font-weight: bold;
  30 +}
  31 +
  32 +.yui3-u-1-3 {
  33 + height: 240px;
  34 +}
  35 +
  36 +#shopping {
  37 + background-color: #f7949c;
  38 +}
  39 +
  40 +#finance {
  41 + background-color: #45e6e4;
  42 +}
  43 +
  44 +#dir {
  45 + background-color: #8caf8c;
  46 +}
  47 +
  48 +#video {
  49 + background-color: #c00;
  50 +}
  51 +
  52 +#images {
  53 + background-color: #4c6e9b;
  54 +}
  55 +
  56 +#apps {
  57 + background-color: #ce8b43;
  58 +}
  59 +
  60 +</style>
  61 +
  62 +</head>
  63 +<body>
  64 + <div id="hd">
  65 + <label for="query">query : </label>
  66 + <input type="text" id="query" name="query" value="plants"/>
  67 + </div>
  68 +
  69 + <div id="bd">
  70 + <div class="yui3-g" id="boring">
  71 + <div class="yui3-u-1-3" id="shopping">
  72 + <div class="content"><h2>Shopping</h2></div>
  73 + </div>
  74 +
  75 + <div class="yui3-u-1-3" id="finance">
  76 + <div class="content"><h2>Finance</h2></div>
  77 + </div>
  78 +
  79 + <div class="yui3-u-1-3" id="dir">
  80 + <div class="content"><h2>Directory</h2></div>
  81 + </div>
  82 + </div>
  83 + <div class="yui3-g" id="entertainment">
  84 + <div class="yui3-u-1-3" id="video">
  85 + <div class="content"><h2>Video</h2></div>
  86 + </div>
  87 +
  88 + <div class="yui3-u-1-3" id="images">
  89 + <div class="content"><h2>Image</h2></div>
  90 + </div>
  91 +
  92 + <div class="yui3-u-1-3" id="apps">
  93 + <div class="content"><h2>Apps</h2></div>
  94 + </div>
  95 + </div>
  96 + </div>
  97 +
  98 + <div id="ft">
  99 + <h3>Have a nice day!</h3>
  100 + </div>
  101 +
  102 + <script type="text/javascript">
  103 +YUI().use('node', 'node-event-simulate', function (Y) {
  104 + window.updateVertical = function (obj) {
  105 + var html = '',
  106 + url = obj.host.substring(7),
  107 + vert = '#' + url.substring(0, url.indexOf('.')),
  108 + ul = Y.one(vert + ' ul');
  109 +
  110 + if (!ul) {
  111 + ul = Y.Node.create('<ul></ul>');
  112 + Y.one(vert).appendChild(ul);
  113 + }
  114 +
  115 + Y.Array.each(obj.results, function (item) {
  116 + html += '<li>' + item + '<li>';
  117 + });
  118 + ul.set('innerHTML', html);
  119 + };
  120 +
  121 + var d = document,
  122 + head = d.getElementsByTagName('head')[0],
  123 + api = '/api',
  124 + q = Y.one('#query'),
  125 + script, handleReq;
  126 +
  127 + handleReq = function (host, rule, callback, htmlFormat) {
  128 + callback = callback || 'updateVertical';
  129 + htmlFormat = htmlFormat || 'outerHTML';
  130 + var obj = {
  131 + host: host,
  132 + rule: encodeURIComponent(rule),
  133 + callback: callback,
  134 + htmlFormat: htmlFormat
  135 + };
  136 +
  137 + script = d.createElement('script'),
  138 + script.type = "text/javascript";
  139 + script.src = (function () {
  140 + var params = [];
  141 + Y.each(obj, function (v, k) {
  142 + params.push(k + '=' + v);
  143 + });
  144 + return api + '?' + params.join('&');
  145 + })();
  146 + head.appendChild(script);
  147 + };
  148 +
  149 + q.on('keyup', function (e) {
  150 + var value = q.get('value'),
  151 + query = encodeURIComponent(value.replace(/^\s+|\s+$/g, ''));
  152 +
  153 + if (!query) {
  154 + return ;
  155 + }
  156 +
  157 + // shopping
  158 + handleReq('http://shopping.yahoo.com/search?p=' + query, '#bd .hproducts .summary h2 a');
  159 +
  160 + // apps
  161 + handleReq('http://apps.search.yahoo.com/search?p=' + query, '#main .app-res h3 a');
  162 +
  163 + // finance
  164 + handleReq('http://finance.search.yahoo.com/search?p=' + query, '#web .res h3 a');
  165 +
  166 + // directory
  167 + handleReq('http://dir.search.yahoo.com/search?p=' + query, '#dir .result_mix h3 a');
  168 +
  169 + // video
  170 + handleReq('http://video.search.yahoo.com/video?p=' + query, '#buzz .pane li a span');
  171 +
  172 + // images
  173 + handleReq('http://images.search.yahoo.com/images?p=' + query, '#car-content li .info a');
  174 + });
  175 +
  176 + if (q.get('value')) {
  177 + q.simulate('keyup');
  178 + }
  179 +});
  180 + </script>
  181 +</body>
  182 +</html>
102 demo/profile.html
... ... @@ -0,0 +1,102 @@
  1 +<!DOCTYPE html>
  2 +<html>
  3 +<head>
  4 + <meta http-equiv="content-type" content="text/html; charset=utf-8">
  5 + <title>Huang47's profile</title>
  6 + <link rel="stylesheet" href="http://yui.yahooapis.com/3.3.0/build/cssreset/reset.css" type="text/css">
  7 + <link rel="stylesheet" href="http://yui.yahooapis.com/3.3.0/build/cssfonts/fonts.css" type="text/css">
  8 + <link rel="stylesheet" href="http://yui.yahooapis.com/3.3.0/build/cssgrids/grids.css" type="text/css">
  9 + <link rel="stylesheet" href="http://a1.twimg.com/twitter-mobile/d8fe8d40bf03a3c3029bafcbbe110d9340f93f12/assets/base.css" type="text/css">
  10 + <script src="http://yui.yahooapis.com/3.3.0/build/yui/yui-min.js"></script>
  11 + <style>
  12 + .column { width: 600px; }
  13 + #twitter, #github { float: left; }
  14 + #twitter {
  15 + background: url("") repeat-x scroll 0 0 #C0DEED;
  16 + }
  17 + #tweets-list { width: 520px; margin: 0 auto; }
  18 + .list-tweet .status {
  19 + display: block;
  20 + margin-top: 0.2em;
  21 + }
  22 + .repo label { font-size: 1.2em; }
  23 + .repo .attribute { font-size: 1.5em; }
  24 + .repo {
  25 + padding: 1em;
  26 + border-radius: 10px;
  27 + margin-bottom: 1em;
  28 + background-image: url("");
  29 + background-repeat: repeat-x;
  30 + }
  31 + </style>
  32 +</head>
  33 +<body>
  34 + <div id="twitter" class="column">
  35 + <ul id="tweets-list"></ul>
  36 + </div>
  37 + <div id="github" class="column">
  38 + <ul id="repos"></ul>
  39 + </div>
  40 +
  41 + <script type="text/javascript">
  42 +YUI().use('node', function (Y) {
  43 + var d = document,
  44 + head = document.getElementsByTagName('head')[0],
  45 + api = '/api',
  46 + callback = 'inst',
  47 + htmlFormat = 'outerHTML',
  48 + script, handleReq;
  49 +
  50 + handleReq = function (host, rule) {
  51 + var obj = {
  52 + host: host,
  53 + rule: encodeURIComponent(rule),
  54 + callback: callback,
  55 + htmlFormat: htmlFormat
  56 + };
  57 +
  58 + script = d.createElement('script'),
  59 + script.type = "text/javascript";
  60 + script.src = (function () {
  61 + var params = [];
  62 + Y.each(obj, function (v, k) {
  63 + params.push(k + '=' + v);
  64 + });
  65 + return api + '?' + params.join('&');
  66 + })();
  67 + head.appendChild(script);
  68 + };
  69 +
  70 + window.inst = function (obj) {
  71 + var html = '';
  72 + Y.each(obj.results, function (v, k) {
  73 + html += '<li>' + v + '</li>';
  74 + });
  75 + Y.one('#tweets-list').set('innerHTML', html);
  76 + };
  77 +
  78 + handleReq('http://mobile.twitter.com/huang47', '#tweets-list .list-tweet');
  79 +
  80 + window.github = function (obj) {
  81 + var repos = obj.repositories,
  82 + html = '';
  83 +
  84 + Y.Array.each(repos, function (repo) {
  85 + html += '<li class="repo">';
  86 + Y.each(repo, function (v, k) {
  87 + html += '<label for="' + k + '">' + k + '</label>' +
  88 + '<div name="' + k + '" class="attribute">' + v + '</div>';
  89 + });
  90 + html += '</li>';
  91 + });
  92 + Y.one('#repos').set('innerHTML', html);
  93 + }
  94 +
  95 + script = d.createElement('script'),
  96 + script.type = "text/javascript";
  97 + script.src = 'http://github.com/api/v2/json/repos/show/huang47?sortBy=pushed_at&callback=github';
  98 + head.appendChild(script);
  99 +});
  100 + </script>
  101 +</body>
  102 +</html>
53 demo/server.js
... ... @@ -0,0 +1,53 @@
  1 +#!/usr/bin/env node
  2 +var app = require('express').createServer()
  3 + , YUI = require('yui').YUI
  4 + , cc = require('../css-crawler.js')
  5 + , url = require('url')
  6 + , querystring = require('querystring')
  7 + , fs = require('fs')
  8 + , md = require('markdown')
  9 + , PORT = process.argv[2] || 10633;
  10 +
  11 +app.get('/api', function (req, res) {
  12 + var query = querystring.parse(url.parse(req.url).query)
  13 + , params = query
  14 + , host = query.host
  15 + , rule = query.rule
  16 + , htmlFormat = query.htmlFormat || 'outerHTML'
  17 + , callback = query.callback || 'callback';
  18 +
  19 + cc.fetch({
  20 + host: host,
  21 + rule: rule,
  22 + format: htmlFormat,
  23 + callback: callback
  24 + });
  25 +
  26 + cc.on('data', function (data) {
  27 +// res.header('Content-Type', 'application/json');
  28 + res.send(callback + '(' + JSON.stringify(data) + ')');
  29 + });
  30 +});
  31 +
  32 +app.get('/demo/:page', function (req, res) {
  33 + fs.readFile(req.params.page + '.html', function (err, data) {
  34 + if (err) {
  35 + }
  36 + res.contentType('text/html');
  37 + res.send(data);
  38 + });
  39 +});
  40 +
  41 +app.get('/readme', function (req, res) {
  42 + fs.readFile('./DEMO.md', function (err, data) {
  43 + var output = '';
  44 + if (err) {
  45 + }
  46 + res.contentType('text/html');
  47 + output = md.markdown.toHTML(data.toString());
  48 + res.send(output);
  49 + });
  50 +});
  51 +
  52 +app.listen(PORT);
  53 +console.log('server is running at port ' + PORT);
5 package.json
@@ -2,7 +2,7 @@
2 2 "author": "huang47 <huge.huang@gmail.com> (huang47.blogspot.com)",
3 3 "name": "css-crawler",
4 4 "description": "Crawl web via css selector",
5   - "version": "0.1.9",
  5 + "version": "0.3.1",
6 6 "repository": {
7 7 "type": "git",
8 8 "url": "git://github.com/huang47/css-crawler.git"
@@ -11,8 +11,7 @@
11 11 "node": "*"
12 12 },
13 13 "dependencies": {
14   - "yui3": "0.7.11",
15   - "express": "2.5.1"
  14 + "yui": "*"
16 15 },
17 16 "devDependencies": {},
18 17 "main": "css-crawler"
6 run
... ... @@ -1,12 +1,10 @@
1 1 #!/usr/bin/env node
2 2
3   -var c = require('css-crawler');
  3 +var c = require('./css-crawler.js');
4 4
5 5 c.fetch({
6 6 host: 'http://news.ycombinator.com',
7   - rule: 'table td.title a',
8   - callback: 'callback',
9   - format: 'innerHTML'
  7 + rule: 'table td.title a'
10 8 });
11 9
12 10 c.on('data', function (obj) {

0 comments on commit 3cd3d10

Please sign in to comment.
Something went wrong with that request. Please try again.