Skip to content

Commit

Permalink
First commit
Browse files Browse the repository at this point in the history
  • Loading branch information
ibrow committed May 21, 2010
0 parents commit 9b19846
Show file tree
Hide file tree
Showing 6 changed files with 5,653 additions and 0 deletions.
29 changes: 29 additions & 0 deletions LICENSE
@@ -0,0 +1,29 @@
----------------------------------------------------------------------
node-rss is released under the MIT License

Copyright (c) 2010 Rob Searles - http://www.robsearles.com

Permission is hereby granted, free of charge, to any person
obtaining a copy of this software and associated documentation
files (the "Software"), to deal in the Software without
restriction, including without limitation the rights to use,
copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the
Software is furnished to do so, subject to the following
conditions:

The above copyright notice and this permission notice shall be
included in all copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
OTHER DEALINGS IN THE SOFTWARE.
----------------------------------------------------------------------
node-xml, which node-rss makes heavy usage is also released under the
MIT License - see http://github.com/robrighter/node-xml for more info
----------------------------------------------------------------------
40 changes: 40 additions & 0 deletions README
@@ -0,0 +1,40 @@
----------------------------------------------------------------------
node-rss - an RSS parser for node.
http://github.com/ibrow/node-rss
Rob Searles - http://www.robsearles.com
----------------------------------------------------------------------
node-rss makes heavy use of the node-xml module written by
Rob Righter - @robrighter
http://github.com/robrighter/node-xml
----------------------------------------------------------------------
node-rss is released under the MIT licence
----------------------------------------------------------------------
See examples.js for working examples of node-rss

----------------------------------------------------------------------
TODO
----------------------------------------------------------------------
Lots, mainly:
- error checking
- writing tests
- make parsing mode robust
- conform to all specifications

----------------------------------------------------------------------
HISTORY
----------------------------------------------------------------------
21 May 2010
Initial release, working on v0.1.95-17-g1036aa9
----------------------------------------------------------------------

----------------------------------------------------------------------
REFERENCE
----------------------------------------------------------------------
RSS 2.0 specification
http://cyber.law.harvard.edu/rss/rss.html

RSS 1.0 specification
http://web.resource.org/rss/1.0/spec

Atom 1.0 specification
http://atompub.org/2005/07/11/draft-ietf-atompub-format-10.html
45 changes: 45 additions & 0 deletions example.js
@@ -0,0 +1,45 @@
/**********************************************************************
example.js
Example of the node-rss feed parser
**********************************************************************/
var sys = require('sys');
var rss = require('./node-rss');


/**********************************************************************
Example One:
Getting a remote RSS feed and parsing
rss.parseURL(feed_url, use_excerpt, callback);
**********************************************************************/
// URL of the feed you want to parse
var feed_url = 'http://feeds.feedburner.com/github';

var response = rss.parseURL(feed_url, function(articles) {
sys.puts(articles.length);
for(i=0; i<articles.length; i++) {
sys.puts("Article: "+i+", "+
articles[i].title+"\n"+
articles[i].link+"\n"+
articles[i].description+"\n"+
articles[i].content
);
}
});

/**********************************************************************
Example Two:
Getting a local RSS feed and parsing
rss.parseFile(feed_file, use_excerpt, callback);
**********************************************************************/
var response = rss.parseFile('nodeblogs.com.feed.xml', function(articles) {
sys.puts(articles.length);
for(i=0; i<articles.length; i++) {
sys.puts("Article: "+i+", "+
articles[i].title+"\n"+
articles[i].link+"\n"+
articles[i].description+"\n"+
articles[i].content
);
}
});
171 changes: 171 additions & 0 deletions node-rss.js
@@ -0,0 +1,171 @@
/**********************************************************************
node-rss - an RSS parser for node.
http://github.com/ibrow/node-rss
Copyright (c) 2010 Rob Searles
http://www.robsearles.com
node-rss is released under the MIT license
- see LICENSE for more info
*********************************************************************
node-rss makes heavy use of the node-xml module written by
Rob Righter - @robrighter
http://github.com/robrighter/node-xml
**********************************************************************/
var sys = require('sys'), http = require('http');
var xml = require("./node-xml");

// variable for holding the callback function which is passed to the
// exported function. This callback is passed the articles array
var callback = function() {};

// The main "meat" of this module - parses an rss feed and triggers
// the callback when done.
// using node-xml: http://github.com/robrighter/node-xml
var parser = new xml.SaxParser(function(cb) {
var articles = Array();
var current_element = false;
var article_count = 0;
var in_item = false;
var current_chars = '';


cb.onStartDocument(function() { });

// when finished parsing the RSS feed, trigger the callback
cb.onEndDocument(function() {
callback(articles);
});


//track what element we are currently in. If it is an <item> this is
// an article, add container array to the list of articles
cb.onStartElementNS(function(elem, attrs, prefix, uri, namespaces) {
current_element = elem.toLowerCase();
if(current_element == 'item' || current_element == 'entry') {
in_item = true;
articles[article_count] = Array();
}
});
// when we are at the end of an element, save its related content
cb.onEndElementNS(function(elem, prefix, uri) {
if(in_item) {
switch(current_element)
{
case 'description':
case 'summary':
articles[article_count][current_element] = current_chars.replace(/^\s\s*/, '').replace(/\s\s*$/, '');
break;
case 'content':
case 'encoded': // feedburner is <content:encoded>, node-xml reads as <encoded>
current_element = 'content';
articles[article_count][current_element] = current_chars.replace(/^\s\s*/, '').replace(/\s\s*$/, '');
break;
case 'link':
case 'title':
articles[article_count][current_element] = current_chars.replace(/^\s\s*/, '').replace(/\s\s*$/, '');
break;
}

current_element = false;
current_chars = '';
if(elem.toLowerCase() == 'item' || elem.toString() == 'entry') {
in_item = false;
article_count ++;
}
}
});

cb.onCharacters(addContent);
cb.onCdata(addContent);
function addContent(chars) {
if(in_item) {
current_chars += chars;
}
};

// @TODO handle warnings and errors properly
cb.onWarning(function(msg) {
sys.puts('<WARNING>'+msg+"</WARNING>");
});
cb.onError(function(msg) {
sys.puts('<ERROR>'+JSON.stringify(msg)+"</ERROR>");
});
});


/**
* parseFile()
* Parses an RSS feed from a file.
* @param file - path to the RSS feed file
* @param cb - callback function to be triggered at end of parsing
*/
exports.parseFile = function(file, cb) {
callback = cb;
parser.parseFile(file);
}
/**
* parseURL()
* Parses an RSS feed from a URL.
* @param url - URL of the RSS feed file
* @param cb - callback function to be triggered at end of parsing
*
* @TODO - decent error checking
*/
exports.parseURL = function(url, cb) {
callback = cb;

get_rss(url);
function get_rss(url) {
var u = require('url'), http = require('http');
var parts = u.parse(url);
//sys.puts(JSON.stringify(parts));

// set the default port to 80
if(!parts.port) { parts.port = 80; }


var redirection_level = 0;
var client = http.createClient(parts.port, parts.hostname);
var request = client.request('GET', parts.pathname, {'host': parts.hostname});
request.addListener('response', function (response) {
//sys.puts('STATUS: ' + response.statusCode);
//sys.puts('HEADERS: ' + JSON.stringify(response.headers));

// check to see the type of status
switch(response.statusCode) {
// check for ALL OK
case 200:
var body = '';
response.addListener('data', function (chunk) {
body += chunk;
});
response.addListener('end', function() {
parser.parseString(body);
});
break;
// redirect status returned
case 301:
case 302:
if(redirection_level > 10) {
sys.puts("too many redirects");
}
else {
sys.puts("redirect to "+response.headers.location);
get_rss(response.headers.location);
}
break;
default:
/*
response.setEncoding('utf8');
response.addListener('data', function (chunk) {
//sys.puts('BODY: ' + chunk);
});
*/
break;
}
});
request.end();
}
};

0 comments on commit 9b19846

Please sign in to comment.