Permalink
Browse files

Eliminate .forEach loops for a small performance gain

  • Loading branch information...
1 parent cde2120 commit 1b6c77bdf4426acd005c6c68d2f707a060a7382e Harry Fuecks committed Oct 21, 2010
Showing with 93 additions and 102 deletions.
  1. +93 −102 lib/soupselect.js
View
@@ -59,122 +59,113 @@ exports.select = function(dom, selector) {
var tokens = selector.split(/\s+/);
- if (typeof StopIteration === "undefined") {
- StopIteration = new Error("StopIteration");
- }
-
- try {
- tokens.forEach(function(token) {
-
- // Attribute selectors
- var match = attrSelectRe.exec(token);
- if ( match ) {
- var attribute = match[2], operator = match[3], value = match[4];
- tag = match[1];
- options = {};
- options[attribute] = makeValueChecker(operator, value);
-
- found = [];
- currentContext.forEach(function(context) {
- found = found.concat(domUtils.getElements(options, context));
- });
-
- if ( tag ) {
- // Filter to only those matching the tag name
- found = domUtils.getElements({ 'tag_name': tag }, found, false);
- }
+ for ( var i = 0; i < tokens.length; i++ ) {
+
+ // Attribute selectors
+ var match = attrSelectRe.exec(tokens[i]);
+ if ( match ) {
+ var attribute = match[2], operator = match[3], value = match[4];
+ tag = match[1];
+ options = {};
+ options[attribute] = makeValueChecker(operator, value);
- currentContext = found;
+ found = [];
+ for (var j = 0; j < currentContext.length; j++ ) {
+ found = found.concat(domUtils.getElements(options, currentContext[j]));
+ };
- }
+ if ( tag ) {
+ // Filter to only those matching the tag name
+ found = domUtils.getElements({ 'tag_name': tag }, found, false);
+ }
- // ID selector
- else if ( token.indexOf('#') !== -1 ) {
- found = [];
-
- var id_selector = token.split('#', 2)[1];
+ currentContext = found;
+
+ }
+
+ // ID selector
+ else if ( tokens[i].indexOf('#') !== -1 ) {
+ found = [];
+
+ var id_selector = tokens[i].split('#', 2)[1];
+
+ // need to stop on the first id found (in bad HTML)...
+ var el = null;
+ for ( var k = 0; k < currentContext.length; k++ ) {
- // uglier construct but need to stop on the first id found (in bad HTML)
- // while avoiding additional StopIterations for efficiency
- var el = null;
- for ( var i = 0; i < currentContext.length; i++ ) {
-
- // the document has no child elements but tags do so we search children to avoid
- // returning the current element via a false positive
- if ( typeof currentContext[i].children !== 'undefined' ) {
- el = domUtils.getElementById(id_selector, currentContext[i].children);
- } else {
- el = domUtils.getElementById(id_selector, currentContext[i]);
- }
-
- if ( el ) {
- found.push(el);
- break;
- }
+ // the document has no child elements but tags do so we search children to avoid
+ // returning the current element via a false positive
+ if ( typeof currentContext[k].children !== 'undefined' ) {
+ el = domUtils.getElementById(id_selector, currentContext[k].children);
+ } else {
+ el = domUtils.getElementById(id_selector, currentContext[k]);
}
-
- if (!found[0]) {
- currentContext = [];
- throw StopIteration;
+
+ if ( el ) {
+ found.push(el);
+ break;
}
-
- currentContext = found;
}
- // Class selector
- else if ( token.indexOf('.') !== -1 ) {
- var parts = token.split('.', 2);
- tag = parts[0];
- options = {};
- options['class'] = function (value) {
- return (value && value.split(/\s+/).indexOf(parts[1]) > -1);
- };
-
- found = [];
- currentContext.forEach(function(context) {
- if ( tag.length > 0 ) {
- context = domUtils.getElementsByTagName(tag, context);
- // don't recurse in the case we have a tag or we get children we might not want
- found = found.concat(domUtils.getElements(options, context, false));
- } else {
- found = found.concat(domUtils.getElements(options, context));
- }
-
- });
-
- currentContext = found;
+ if (!found[0]) {
+ currentContext = [];
+ break;
}
- // Star selector
- else if ( token === '*' ) {
- // nothing to do right?
- }
+ currentContext = found;
+ }
+
+ // Class selector
+ else if ( tokens[i].indexOf('.') !== -1 ) {
+ var parts = tokens[i].split('.', 2);
+ tag = parts[0];
+ options = {};
+ options['class'] = function (value) {
+ return (value && value.split(/\s+/).indexOf(parts[1]) > -1);
+ };
- // Tag selector
- else {
- if (!tagRe.test(token)) {
- currentContext = [];
- throw StopIteration;
+ found = [];
+ for ( var l = 0; l < currentContext.length; l++ ) {
+ var context = currentContext[l];
+ if ( tag.length > 0 ) {
+ context = domUtils.getElementsByTagName(tag, context);
+ // don't recurse in the case we have a tag or we get children we might not want
+ found = found.concat(domUtils.getElements(options, context, false));
+ } else {
+ found = found.concat(domUtils.getElements(options, context));
}
- found = [];
- currentContext.forEach(function(context) {
- // htmlparsers document itself has no child property - only nodes do...
- if ( typeof context.children !== 'undefined' ) {
- found = found.concat(domUtils.getElementsByTagName(token, context.children));
- } else {
- found = found.concat(domUtils.getElementsByTagName(token, context));
- }
-
- });
-
- currentContext = found;
- }
- });
+ };
+
+ currentContext = found;
+ }
- } catch (error) {
- if (error !== StopIteration) { throw error; }
- }
+ // Star selector
+ else if ( tokens[i] === '*' ) {
+ // nothing to do right?
+ }
+
+ // Tag selector
+ else {
+ if (!tagRe.test(tokens[i])) {
+ currentContext = [];
+ break;
+ }
+
+ found = [];
+ for ( var m = 0; m < currentContext.length; m++ ) {
+ // htmlparsers document itself has no child property - only nodes do...
+ if ( typeof context.children !== 'undefined' ) {
+ found = found.concat(domUtils.getElementsByTagName(tokens[i], currentContext[m].children));
+ } else {
+ found = found.concat(domUtils.getElementsByTagName(tokens[i], currentContext[m]));
+ }
+
+ });
+
+ currentContext = found;
+ }
+ };
return currentContext;
};

0 comments on commit 1b6c77b

Please sign in to comment.