Permalink
Browse files

Upgraded the selector query parser to allow nested selectors like 'di…

…v:has(p:has(span))'
  • Loading branch information...
jhy committed Jun 22, 2010
1 parent 97d203b commit cf3676c534e92052acf870c71e2fa8fb9c060fe6
View
@@ -1,5 +1,8 @@
jsoup changelog
+*** Release 1.2.2 [pending]
+ * Upgraded the selector query parser to allow nested selectors like 'div:has(p:has(span))'
+
*** Release 1.2.1 [2010-Jun-21]
* Added .before(html) and .after(html) methods to Element and Elements, to insert sibling HTML
@@ -188,6 +188,32 @@ public String chompTo(String seq) {
return data;
}
+ /**
+ * Pulls a balanced string off the queue. E.g. if queue is "(one (two) three) four", (,) will return "one (two) three",
+ * and leave " four" on the queue
+ * @param open opener
+ * @param close closer
+ * @return data matched from the queue
+ */
+ public String chompBalanced(Character open, Character close) {
+ StringBuilder accum = new StringBuilder();
+ int depth = 0;
+ int i = 0;
+ do {
+ if (queue.isEmpty()) break;
+ Character c = consume();
+ if (c.equals(open))
+ depth++;
+ else if (c.equals(close))
+ depth--;
+
+ if (depth > 0 && i > 0)
+ accum.append(c); // dont include the outer match pair in the return
+ i++;
+ } while (depth > 0);
+ return accum.toString();
+ }
+
/**
* Pulls the next run of whitespace characters of the queue.
*/
@@ -159,12 +159,12 @@ private Elements findElements() {
return indexGreaterThan();
} else if (tq.matchChomp(":eq(")) {
return indexEquals();
- } else if (tq.matchChomp(":has(")) {
+ } else if (tq.matches(":has(")) {
return has();
} else if (tq.matchChomp(":contains(")) {
return contains();
} else { // unhandled
- throw new SelectorParseException("Could not parse query " + query);
+ throw new SelectorParseException("Could not parse query '%s': unexpected token at '%s'", query, tq.remainder());
}
}
@@ -224,7 +224,7 @@ else if (tq.matchChomp("*="))
return root.getElementsByAttributeValueContaining(key, tq.chompTo("]"));
else
- throw new SelectorParseException("Could not parse attribute query " + query);
+ throw new SelectorParseException("Could not parse attribute query '%s': unexpected token at '%s'", query, tq.remainder());
}
}
@@ -253,13 +253,15 @@ private int consumeIndex() {
// pseudo selector :has(el)
private Elements has() {
- String subQuery = tq.chompTo(")");
+ tq.consume(":has");
+ String subQuery = tq.chompBalanced('(',')');
Validate.notEmpty(subQuery, ":has(el) subselect must not be empty");
return filterForParentsOfDescendants(elements, select(subQuery, elements));
}
// pseudo selector :contains(text)
+ // todo: allow escaped ) in there. probably do a balanced match, for convenience of caller
private Elements contains() {
String searchText = tq.chompTo(")");
Validate.notEmpty(searchText, ":contains(text) query must not be empty");
@@ -359,8 +361,8 @@ private static Elements filterForSelf(Collection<Element> parents, Collection<El
}
public static class SelectorParseException extends IllegalStateException {
- public SelectorParseException(String s) {
- super(s);
+ public SelectorParseException(String msg, Object... params) {
+ super(String.format(msg, params));
}
}
}
@@ -0,0 +1,27 @@
+package org.jsoup.parser;
+
+import org.junit.Test;
+import static org.junit.Assert.*;
+
+/**
+ * Token queue tests.
+ */
+public class TokenQueueTest {
+ @Test public void chompBalanced() {
+ TokenQueue tq = new TokenQueue(":contains(one (two) three) four");
+ String pre = tq.consumeTo("(");
+ String guts = tq.chompBalanced('(', ')');
+ String remainder = tq.remainder();
+
+ assertEquals(":contains", pre);
+ assertEquals("one (two) three", guts);
+ assertEquals(" four", remainder);
+ }
+
+ @Test public void chompBalancedMatchesAsMuchAsPossible() {
+ TokenQueue tq = new TokenQueue("unbalanced(something(or another");
+ tq.consumeTo("(");
+ String match = tq.chompBalanced('(', ')');
+ assertEquals("something(or another", match);
+ }
+}
@@ -370,6 +370,13 @@
assertEquals("1", divs3.get(1).id());
assertEquals("2", divs3.get(2).id());
}
+
+ @Test public void testNestedHas() {
+ Document doc = Jsoup.parse("<div><p><span>One</span></p></div> <div><p>Two</p></div>");
+ Elements divs = doc.select("div:has(p:has(span))");
+ assertEquals(1, divs.size());
+ assertEquals("One", divs.first().text());
+ }
@Test public void testPseudoContains() {
Document doc = Jsoup.parse("<div><p>The Rain.</p> <p class=light>The <i>rain</i>.</p> <p>Rain, the.</p></div>");

0 comments on commit cf3676c

Please sign in to comment.