Skip to content
This repository

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse code

Put the uppercasing of tags back, and make it configurable.

  • Loading branch information...
commit 6ce93ba630005de7353f55ec4538fd08d79b4f42 1 parent d5cfdb0
Isaac Z. Schlueter authored February 10, 2010
48  README.md
Source Rendered
@@ -29,6 +29,20 @@ browser or other CommonJS implementations.
29 29
     
30 30
     parser.write('<xml>Hello, <who name="world">world</who>!</xml>').close();
31 31
 
  32
+## Arguments
  33
+
  34
+Pass the following arguments to the parser function.  All are optional.
  35
+
  36
+`strict` - Boolean. Whether or not to be a jerk. Default: `false`.
  37
+
  38
+`opt` - Object bag of settings regarding string formatting.  All default to `false`.
  39
+Settings supported:
  40
+
  41
+* `trim` - Boolean. Whether or not to trim text and comment nodes.
  42
+* `normalize` - Boolean. If true, then turn any whitespace into a single space.
  43
+* `lowercasetags` - Boolean. If true, then lowercase tags in loose mode, rather
  44
+  than uppercasing them.
  45
+
32 46
 ## Methods
33 47
 
34 48
 `write` - Write bytes onto the stream. You don't have to do this all at once. You
@@ -37,9 +51,27 @@ can keep writing as much as you want.
37 51
 `close` - Close the stream. Once closed, no more data may be written until it is
38 52
 done processing the buffer, which is signaled by the `end` event.
39 53
 
  54
+## Members
  55
+
  56
+At all times, the parser object will have the following members:
  57
+
  58
+`line`, `column`, `position` - Indications of the position in the XML document where
  59
+the parser currently is looking.
  60
+
  61
+`closed` - Boolean indicating whether or not the parser can be written to.  If it's 
  62
+`true`, then wait for the `ready` event to write again.
  63
+
  64
+`strict` - Boolean indicating whether or not the parser is a jerk.
  65
+
  66
+`opt` - The options you passed into the constructor (or the defaults.)
  67
+
  68
+And a bunch of other stuff that you probably shouldn't touch.
  69
+
40 70
 ## Events
41 71
 
42  
-All events emit with a single argument.  To listen to an event, assign a function to `on<eventname>`.  Functions get executed in the this-context of the parser object.
  72
+All events emit with a single argument. To listen to an event, assign a function to
  73
+`on<eventname>`. Functions get executed in the this-context of the parser object.
  74
+The list of supported events are also in the exported `EVENTS` array.
43 75
 
44 76
 `error` - Indication that something bad happened. The error will be hanging out on
45 77
 `parser.error`, and must be deleted before parsing can continue. By listening to
@@ -54,6 +86,10 @@ more in strict mode. Argument: instance of `Error`.
54 86
 `name` and `body` members. Attributes are not parsed, as processing instructions
55 87
 have implementation dependent semantics.
56 88
 
  89
+`sgmldeclaration` - Random SGML declarations.  Stuff like `<!ENTITY p>` would trigger
  90
+this kind of event.  This is a weird thing to support, so it might go away at some
  91
+point.  SAX isn't intended to be used to parse SGML, after all.
  92
+
57 93
 `opentag` - An opening tag. Argument: object with `name` and `attributes`. In
58 94
 non-strict mode, tag names are uppercased.
59 95
 
@@ -65,10 +101,12 @@ will have `closeTag` emitted immediately after `openTag`.  Argument: tag name.
65 101
 
66 102
 `comment` - A comment node.  Argument: the string of the comment.
67 103
 
68  
-`end` - Indication that the closed stream is complete, and ready to start parsing
69  
-a new XML document.
  104
+`cdata` - A `<![CDATA[` block.  Argument: the string of random character data.
  105
+
  106
+`end` - Indication that the closed stream has ended.
  107
+
  108
+`ready` - Indication that the stream has reset, and is ready to be written to.
70 109
 
71 110
 ## Todo
72 111
 
73  
-It'd be better if it parsed character classes in attribute names and values.  
74  
-Right now it doesn't.
  112
+Build an HTML parser on top of this.
2  examples/example.js
@@ -21,7 +21,7 @@ xml.addCallback(function (xml) {
21 21
   });
22 22
   loose.onend = function () {
23 23
     sys.error("end");
24  
-    // sys.error(sys.inspect(loose));
  24
+    sys.error(sys.inspect(loose));
25 25
   };
26 26
   
27 27
   // do this one char at a time to verify that it works.
25  lib/sax.js
@@ -9,7 +9,8 @@ function SAXParser (strict, opt) {
9 9
     this.procInstName = this.procInstBody = this.entity =
10 10
     this.attribName = this.attribValue = this.q =
11 11
     this.cdata = this.sgmlDecl = "";
12  
-  this.opt = opt || {trim:false};
  12
+  this.opt = opt || {};
  13
+  this.tagCase = this.opt.lowercasetags ? "toLowerCase" : "toUpperCase";
13 14
   this.tags = [];
14 15
   this.closedRoot = this.sawRoot = false;
15 16
   this.tag = this.error = null;
@@ -127,11 +128,14 @@ function emitNode (parser, nodeType, data) {
127 128
   emit(parser, nodeType, data);
128 129
 }
129 130
 function closeText (parser) {
130  
-  if (parser.opt.trim) parser.textNode = parser.textNode.trim();
  131
+  parser.textNode = textopts(parser.opt, parser.textNode);
131 132
   if (parser.textNode) emit(parser, "ontext", parser.textNode);
132 133
   parser.textNode = "";
133 134
 }
134  
-
  135
+function textopts (opt, text) {
  136
+  if (opt.trim) text = text.trim();
  137
+  if (opt.normalize) text = text.replace(/\s+/g, " ");
  138
+}
135 139
 function error (parser, er) {
136 140
   closeText(parser);
137 141
   er += "\nLine: "+parser.line+
@@ -142,13 +146,12 @@ function error (parser, er) {
142 146
   parser.error = er;
143 147
   emit(parser, "onerror", er);
144 148
 }
145  
-
146 149
 function end (parser) {
147 150
   if (parser.state !== S.TEXT) error(parser, "Unexpected end");
148 151
   closeText(parser);
149 152
   parser.c = "";
150 153
   emit(parser, "onend");
151  
-  SAXParser.call(parser, parser.strict);
  154
+  SAXParser.call(parser, parser.strict, parser.opt);
152 155
 }
153 156
 function strictFail (parser, message) {
154 157
   if (parser.strict) error(parser, message);
@@ -163,10 +166,8 @@ function chomp (parser) {
163 166
   } else parser.column ++;
164 167
 }
165 168
 
166  
-///// Jump 
167  
-
  169
+///// Jump http://www.youtube.com/watch?v=jcrzCdNFpT8
168 170
 function trampoline (parser) { T[S[parser.state]](parser, parser.c) }
169  
-// http://www.youtube.com/watch?v=jcrzCdNFpT8
170 171
 T.BEGIN = function (parser, c) {
171 172
   if (c === "<") parser.state = S.OPEN_WAKA;
172 173
   else if (not(whitespace,c)) {
@@ -180,13 +181,11 @@ T.BEGIN = function (parser, c) {
180 181
 T.TEXT = function (parser, c) {
181 182
   if (c === "<") parser.state = S.OPEN_WAKA;
182 183
   else if (not(whitespace, c) && (!parser.sawRoot || parser.closedRoot)) {
183  
-    // non-whitespace or comments or procinsts after the root.
184 184
     strictFail("Text data outside of root node.");
185 185
   }
186 186
   else if (c === "&") parser.state = S.TEXT_ENTITY;
187 187
   else parser.textNode += c;
188 188
 }
189  
-
190 189
 T.OPEN_WAKA = function (parser, c) {
191 190
   // either a /, ?, !, or text is coming next.
192 191
   if (c === "!") {
@@ -209,7 +208,6 @@ T.OPEN_WAKA = function (parser, c) {
209 208
     parser.state = S.TEXT;
210 209
   }
211 210
 }
212  
-// Comments, Doctypes, Cdata, mostly
213 211
 T.SGML_DECL = function (parser, c) {
214 212
   if ((parser.sgmlDecl+c).toUpperCase() === CDATA) {
215 213
     parser.state = S.CDATA;
@@ -244,7 +242,8 @@ T.COMMENT = function (parser, c) {
244 242
 T.COMMENT_ENDING = function (parser, c) {
245 243
   if (c === "-") {
246 244
     parser.state = S.COMMENT_ENDED;
247  
-    emitNode(parser, "oncomment", parser.comment);
  245
+    parser.comment = textopts(parser.opt, parser.comment);
  246
+    if (parser.comment) emitNode(parser, "oncomment", parser.comment);
248 247
     parser.comment = "";
249 248
   } else {
250 249
     strictFail(parser, "Invalid comment");
@@ -311,6 +310,7 @@ T.PROC_INST_QUOTED = function (parser, c) {
311 310
   }
312 311
 }
313 312
 function newTag (parser) {
  313
+  if (!parser.strict) parser.tagName = parser.tagName[parser.tagCase]();
314 314
   parser.tag = { name : parser.tagName, attributes : {} };
315 315
 }
316 316
 function openTag (parser) {
@@ -422,6 +422,7 @@ function closeTag (parser) {
422 422
     return;
423 423
   }
424 424
   do {
  425
+    if (!parser.strict) parser.tagName = parser.tagName[parser.tagCase]();
425 426
     var closeTo = parser.tagName, close = parser.tags.pop();
426 427
     if (closeTo !== close.name) {
427 428
       strictFail(parser, "Unexpected close tag.");

0 notes on commit 6ce93ba

Please sign in to comment.
Something went wrong with that request. Please try again.