Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP

Loading…

Added support for tags like schemaSpec #14

Closed
wants to merge 2 commits into from

1 participant

Siedrix
Siedrix

Im using Cheerios to parse Tei files, its a subset of xml and i need to be able to parse tags like schemaSpec, titleStmt, sourceDesc.

I made the change to the regex and it works great, maybe its slower

Siedrix Siedrix closed this
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Commits on Jan 5, 2012
  1. Siedrix
Commits on Apr 28, 2012
  1. Siedrix
This page is out of date. Refresh to see the latest.
Showing with 35 additions and 14 deletions.
  1. +35 −14 lib/soupselect.js
49 lib/soupselect.js
View
@@ -5,21 +5,20 @@ http://www.opensource.org/licenses/mit-license.php
MIT licensed http://www.opensource.org/licenses/mit-license.php
*/
-var domUtils = require("htmlparser").DomUtils;
-var sys = require('sys');
+var domUtils = require("htmlparser2").DomUtils;
-var tagRe = /^[a-z0-9]+$/;
+var tagRe = /^[a-zA-Z0-9]+$/;
/*
- /^(\w+)?\[(\w+)([=~\|\^\$\*]?)=?"?([^\]"]*)"?\]$/
- \---/ \---/\-------------/ \-------/
- | | | |
- | | | The value
+ /^(\w+)?\[(\w+)([=~\|\^\$\*]?)=?["']?([^\]"']*)["']?\]$/
+ \---/ \---/ \-------------/ \--------/
+ | | | |
+ | | | The value
| | ~,|,^,$,* or =
| Attribute
Tag
*/
-var attrSelectRe = /^(\w+)?\[(\w+)([=~\|\^\$\*]?)=?"?([^\]"]*)"?\]$/;
+var attrSelectRe = /^(\w+)?\[(\w+)([=~\|\^\$\*]?)=?["']?([^\]"']*)["']?\]$/;
/**
Takes an operator and a value and returns a function which can be used to
@@ -54,13 +53,20 @@ a valid dom tree, so can be passed by into
htmlparser.DomUtil.* calls
*/
exports.select = function(dom, selector) {
+ //console.log('starting with', selector, dom)
+
var currentContext = [dom];
- var found, tag, options;
+ var hacked, found, tag, options, foundNodes = [];
+
var tokens = selector.split(/\s+/);
+
+ if(selector.search(',') >= 0){
+ hacked = true;
+ tokens = selector.split(',')
+ }
for ( var i = 0; i < tokens.length; i++ ) {
-
// Attribute selectors
var match = attrSelectRe.exec(tokens[i]);
if ( match ) {
@@ -157,6 +163,16 @@ exports.select = function(dom, selector) {
break;
}
+ if(hacked){
+ //console.log('selector', tokens[i], typeof currentContext[0]);
+ var nodesFromTag = domUtils.getElementsByTagName(tokens[i], currentContext[0]);
+ //console.log('n of foundNodes', tokens[i], nodesFromTag.length);
+ nodesFromTag.forEach(function(item){
+ item.nodeName = tokens[i];
+ foundNodes.push(item);
+ });
+ }
+
found = [];
for ( var m = 0; m < currentContext.length; m++ ) {
// htmlparsers document itself has no child property - only nodes do...
@@ -165,12 +181,17 @@ exports.select = function(dom, selector) {
} else if (i === 0) {
found = found.concat(domUtils.getElementsByTagName(tokens[i], currentContext[m]));
}
-
};
- currentContext = found;
+ if(!hacked){
+ currentContext = found;
+ }
}
};
-
- return currentContext;
+
+ if(!hacked){
+ return currentContext;
+ }else{
+ return foundNodes;
+ }
};
Something went wrong with that request. Please try again.