Permalink
Browse files

0.0.1

  • Loading branch information...
0 parents commit cd57f02f5e3031152e015ba87eea6650df110bf3 @seanhess seanhess committed Apr 10, 2012
Showing with 333 additions and 0 deletions.
  1. +2 −0 .gitignore
  2. +66 −0 README.md
  3. +179 −0 index.js
  4. +25 −0 package.json
  5. +61 −0 test.js
@@ -0,0 +1,2 @@
+.DS_Store
+node_modules
@@ -0,0 +1,66 @@
+
+xml-events
+==========
+
+This module provides a more usable interface to [node-expat](https://github.com/astro/node-expat)'s fast SAX parser.
+
+
+Installation
+------------
+
+ npm install xml-events
+
+
+Parsing
+-------
+
+Contextual events are fired when the parser finishes parsing a descendent's attributes and text. You can then listen for futher descendents on the matched node.
+
+ var xml = require('xml-events')
+ var data = "<root><person age='32'><name>Somebody</name></person></root>"
+ xml.parse(data, function(err, root) {
+
+ // find <person> nodes anywhere in the document
+ root.on('person', function(person) {
+
+ // find <name> nodes anywhere within this <person>
+ person.on('name', function(name) {
+
+ // person is still in scope, so we can access its attributes.
+ console.log("Found person: " + name.text + " with age " + person.attr('age'))
+ })
+ })
+
+ root.onEnd(function() {
+ console.log("Done parsing the document")
+ })
+ })
+
+Streaming
+---------
+
+You can stream data to the parser with `xml.parser`.
+
+ var xml = require('xml-events')
+ var parse = xml.parser(function(err, root) {
+ root.on('name', function(name) {
+ console.log("found name: " + name)
+ })
+ })
+
+ parse("<root><nam")
+ parse("e>bob</nam")
+ parse("e></root>")
+
+If you are using a file stream, just listen to the `data` event and pass it to `parse`
+
+ var parse = xml.parser(function(err, root) {
+ ...
+ })
+
+ var stream = fs.createReadStream("somefile.xml")
+ stream.on('data', function(data) {
+ parse(data)
+ })
+
+
@@ -0,0 +1,179 @@
+
+// A better wrapper around a sax parser
+
+var expat = require('node-expat')
+var events = require('events')
+var _ = require('underscore')
+
+// The root node is returned in the callback to parse
+// You can call 'on' and 'onEnd'
+// - node.on 'someNode', (node) ->
+// you can call on and onEnd of subnodes too
+
+function Node(name, attrs, text) {
+ var emitter = new events.EventEmitter(),
+ self = this
+
+ self.name = name
+ self.attrs = attrs || {}
+ self.text = text || ""
+
+ // by default the max listeners is 10, this won't actually limit the listeners,
+ // but print warnings for each listener added after 10. 0 is unlimited.
+
+ emitter.setMaxListeners(0)
+
+ _(self).extend(events.EventEmitter)
+
+ self.on = function(name, cb) {
+ emitter.on(name, cb)
+ }
+
+ // Listen for when this node ends
+ self.onEnd = function(cb) {
+ emitter.on(Node.EndedEvent, cb)
+ }
+
+ // internal. when the node has closed.
+ self.ended = function() {
+ emitter.emit(Node.EndedEvent)
+ }
+
+ // interal. when we found a node inside it
+ self.foundDescendant = function(node) {
+ emitter.emit(node.name, node)
+ }
+
+ // Returns an attribute
+ self.attr = function(name) {
+ return self.attrs[name]
+ }
+
+ // Just for fun
+ self.toString = function() {
+ var out = "<" + self.name
+
+ _(self.attrs).each(function(value, name) {
+ out += " " + name + '="' + value + '"'
+ })
+ out += ">" + self.text + "</" + self.name + ">"
+ return out
+ }
+ return self
+}
+
+Node.EndedEvent = '_ended'
+// Returns a parser you can feed things to
+
+exports.parser = function(cb) {
+ var parents = [],
+ currentNode,
+ parser,
+ sendUnsentNode,
+ unsentNode
+
+ var calledCallback = false
+
+ function callback(err, root) {
+ calledCallback = true
+ cb(err, root)
+ }
+
+ // Send foundNode to every parent along the way
+ // Not to yourself though
+
+ function sendUnsentNode() {
+ var _i, _len, _ref, parent
+ if (typeof unsentNode !== "undefined" && unsentNode !== null) {
+ _ref = parents
+ for (_i = 0, _len = _ref.length; _i < _len; _i++) {
+ parent = _ref[_i]
+ parent.foundDescendant(unsentNode)
+ }
+ return (unsentNode = null)
+ }
+ }
+
+ // Set up the parser
+ parser = new expat.Parser("UTF-8")
+
+ parser.on('startElement', function(name, attrs) {
+ if (currentNode) {
+ parents.push(currentNode)
+ sendUnsentNode()
+ }
+ currentNode = new Node(name, attrs)
+ unsentNode = currentNode
+ // Send back the root
+ if (parents.length === 0)
+ callback(null, currentNode)
+ })
+
+ parser.on('endElement', function(name) {
+
+ // sys.puts "Ending #{name} (#{currentNode})"
+ sendUnsentNode()
+ currentNode.ended()
+ currentNode = parents.pop()
+ })
+
+ parser.on('text', function(text) {
+ currentNode.text = currentNode.text || ""
+ currentNode.text += text;
+ // sendUnsentNode()
+ });
+
+ // parser.addListener 'processingInstruction', (target, data) ->
+ // parser.addListener 'comment', (comment) ->
+ // parser.addListener 'xmlDeclaration', (version, encoding, standalone) ->
+
+ return function(somethingWithToString) {
+
+
+ if (!somethingWithToString) return callback(new Error("PARSE ERROR - string was null"))
+
+ // somethingWithToString.toString() because string might be a buffer
+ var result = parser.parse(somethingWithToString.toString())
+
+ if (!result)
+ callback(new Error("PARSE ERROR: " + parser.getError() + " For String: ((( " + somethingWithToString + " )))"))
+
+ return calledCallback
+ }
+}
+
+// Parse a string
+// cb (err, root)
+// If you want to know when we're done parsing, do:
+// parser.parse string, (err, root) ->
+// root.onEnd -> sys.puts "Ended"
+//
+// Normal Example
+// parser.parse string, (err, root) ->
+// root.on 'somenode', (node) ->
+// node.on 'subnode', (node) ->
+// sys.puts node.text
+// sys.puts node.attr 'someattribute'
+// root.onEnd -> sys.puts "Ended"
+
+exports.parse = function(string, cb) {
+
+ // this temporary until we can get it to call back more than once and then we will know the xml where it breaks
+ var callbacked = false
+ function callback (err, data) {
+ if (callbacked) {
+ // this would throw an error anyway, now we just add the xml to
+ throw new Error("Parser called back multiple times on xml: " + string)
+ }
+ else {
+ callbacked = true
+ cb(err, data)
+ }
+ }
+
+ var parse = exports.parser(callback)
+ var result = parse(string)
+
+ if (!result) callback(new Error("parser.parse never finished: " + string))
+ // if not result then cb new Error "Could not parse" else null
+}
@@ -0,0 +1,25 @@
+{
+ "author": "Sean Hess <sean@i.tv>",
+ "name": "xml-events",
+ "description": "An event-driven syntax for parsing xml streams with node-expat",
+ "version": "0.0.1",
+ "repository": {
+ "type": "git",
+ "url": "git://github.com/idottv/node-xml-events.git"
+ },
+ "main": "index.js",
+ "scripts":{
+ "test":"node_modules/.bin/mocha test.js"
+ },
+ "dependencies": {
+ "node-expat":"1.5.0",
+ "underscore":"1.3.3"
+ },
+ "devDependencies": {
+ "mocha":"1.0.1"
+ },
+ "optionalDependencies": {},
+ "engines": {
+ "node": "*"
+ }
+}
@@ -0,0 +1,61 @@
+
+var sys = require('util')
+var xml = require('./index')
+var assert = require('assert')
+
+describe('xml-events', function() {
+ it('should return an error for a null string', function(done) {
+ var parse = xml.parser(function(err, root) {
+ assert.ok(err)
+ done()
+ })
+ parse(null)
+ })
+
+ it('should error if not complete', function(done) {
+ xml.parse("<root", function(err, root) {
+ assert.ok(err, "Parser should have returned an error on parse if incomplete")
+ done()
+ })
+ })
+
+ it('should parse some xml', function(done) {
+
+ xml.parse("<root><one>hello</one><two att='value'/></root>", function(err, root) {
+
+ assert.ifError(err)
+ assert.ok(root.toString())
+
+ root.on('one', function(node) {
+ assert.equal(node.text, "hello")
+
+ root.on('two', function(node) {
+ assert.equal(node.attr('att'), "value")
+
+ root.onEnd(function() {
+ done()
+ })
+ })
+ })
+ })
+ })
+
+ it('should parse a stream', function(done) {
+ var parse = xml.parser(function(err, root) {
+ var names = []
+ root.on('name', function(name) {
+ names.push(name.text)
+ })
+ root.onEnd(function() {
+ assert.equal(names.length, 4)
+ done()
+ })
+ })
+
+ parse("<roo")
+ parse("t><name>henry</name><name>bo")
+ parse("b</name><name>john</na")
+ parse("me><name>will</name></root>")
+ })
+})
+

0 comments on commit cd57f02

Please sign in to comment.