Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with
or
.
Download ZIP

Loading…

Added support for tags like schemaSpec #14

Closed
wants to merge 2 commits into from

1 participant

@Siedrix

Im using Cheerios to parse Tei files, its a subset of xml and i need to be able to parse tags like schemaSpec, titleStmt, sourceDesc.

I made the change to the regex and it works great, maybe its slower

@Siedrix Siedrix closed this
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Commits on Jan 5, 2012
  1. @Siedrix
Commits on Apr 28, 2012
  1. @Siedrix
This page is out of date. Refresh to see the latest.
Showing with 35 additions and 14 deletions.
  1. +35 −14 lib/soupselect.js
View
49 lib/soupselect.js
@@ -5,21 +5,20 @@ http://www.opensource.org/licenses/mit-license.php
MIT licensed http://www.opensource.org/licenses/mit-license.php
*/
-var domUtils = require("htmlparser").DomUtils;
-var sys = require('sys');
+var domUtils = require("htmlparser2").DomUtils;
-var tagRe = /^[a-z0-9]+$/;
+var tagRe = /^[a-zA-Z0-9]+$/;
/*
- /^(\w+)?\[(\w+)([=~\|\^\$\*]?)=?"?([^\]"]*)"?\]$/
- \---/ \---/\-------------/ \-------/
- | | | |
- | | | The value
+ /^(\w+)?\[(\w+)([=~\|\^\$\*]?)=?["']?([^\]"']*)["']?\]$/
+ \---/ \---/ \-------------/ \--------/
+ | | | |
+ | | | The value
| | ~,|,^,$,* or =
| Attribute
Tag
*/
-var attrSelectRe = /^(\w+)?\[(\w+)([=~\|\^\$\*]?)=?"?([^\]"]*)"?\]$/;
+var attrSelectRe = /^(\w+)?\[(\w+)([=~\|\^\$\*]?)=?["']?([^\]"']*)["']?\]$/;
/**
Takes an operator and a value and returns a function which can be used to
@@ -54,13 +53,20 @@ a valid dom tree, so can be passed by into
htmlparser.DomUtil.* calls
*/
exports.select = function(dom, selector) {
+ //console.log('starting with', selector, dom)
+
var currentContext = [dom];
- var found, tag, options;
+ var hacked, found, tag, options, foundNodes = [];
+
var tokens = selector.split(/\s+/);
+
+ if(selector.search(',') >= 0){
+ hacked = true;
+ tokens = selector.split(',')
+ }
for ( var i = 0; i < tokens.length; i++ ) {
-
// Attribute selectors
var match = attrSelectRe.exec(tokens[i]);
if ( match ) {
@@ -157,6 +163,16 @@ exports.select = function(dom, selector) {
break;
}
+ if(hacked){
+ //console.log('selector', tokens[i], typeof currentContext[0]);
+ var nodesFromTag = domUtils.getElementsByTagName(tokens[i], currentContext[0]);
+ //console.log('n of foundNodes', tokens[i], nodesFromTag.length);
+ nodesFromTag.forEach(function(item){
+ item.nodeName = tokens[i];
+ foundNodes.push(item);
+ });
+ }
+
found = [];
for ( var m = 0; m < currentContext.length; m++ ) {
// htmlparsers document itself has no child property - only nodes do...
@@ -165,12 +181,17 @@ exports.select = function(dom, selector) {
} else if (i === 0) {
found = found.concat(domUtils.getElementsByTagName(tokens[i], currentContext[m]));
}
-
};
- currentContext = found;
+ if(!hacked){
+ currentContext = found;
+ }
}
};
-
- return currentContext;
+
+ if(!hacked){
+ return currentContext;
+ }else{
+ return foundNodes;
+ }
};
Something went wrong with that request. Please try again.