Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add select methods returning element streams #2092

Open
wants to merge 7 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 36 additions & 5 deletions src/main/java/org/jsoup/nodes/Element.java
Original file line number Diff line number Diff line change
Expand Up @@ -486,6 +486,41 @@ public Elements select(Evaluator evaluator) {
return Selector.select(evaluator, this);
}

/**
* Find elements that match the {@link Selector} CSS query, with this element as the starting context. Matched elements
* may include this element, or any of its children.
* <p>This method is generally more powerful to use than the DOM-type {@code getElementBy*} methods, because
* multiple filters can be combined, e.g.:</p>
* <ul>
* <li>{@code el.selectStream("a[href]")} - finds links ({@code a} tags with {@code href} attributes)
* <li>{@code el.selectStream("a[href*=example.com]")} - finds links pointing to example.com (loosely)
* </ul>
* <p>See the query syntax documentation in {@link org.jsoup.select.Selector}.</p>
* <p>Also known as {@code querySelectorAll()} in the Web DOM.</p>
*
* @param cssQuery a {@link Selector} CSS-like query
* @return a {@link Stream} containing elements that match the query (empty if none match)
* @see Selector selector query syntax
* @see QueryParser#parse(String)
* @throws Selector.SelectorParseException (unchecked) on an invalid CSS query.
* @since 1.18.1
*/
public Stream<Element> selectStream(String cssQuery) {
return Selector.selectStream(cssQuery, this);
}

/**
* Find elements that match the supplied Evaluator. This has the same functionality as {@link #select(String)}, but
* may be useful if you are running the same query many times (on many documents) and want to save the overhead of
* repeatedly parsing the CSS query.
* @param evaluator an element evaluator
* @return a {@link Stream} containing elements that match the query (empty if none match)
* @since 1.18.1
*/
public Stream<Element> selectStream(Evaluator evaluator) {
return Selector.selectStream(evaluator, this);
}

/**
* Find the first Element that matches the {@link Selector} CSS query, with this element as the starting context.
* <p>This is effectively the same as calling {@code element.select(query).first()}, but is more efficient as query
Expand Down Expand Up @@ -1126,11 +1161,7 @@ public Elements getElementsByTag(String tagName) {
public @Nullable Element getElementById(String id) {
Validate.notEmpty(id);

Elements elements = Collector.collect(new Evaluator.Id(id), this);
if (elements.size() > 0)
return elements.get(0);
else
return null;
return selectStream(new Evaluator.Id(id)).findFirst().orElse(null);
}

/**
Expand Down
25 changes: 16 additions & 9 deletions src/main/java/org/jsoup/select/Collector.java
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@
import org.jsoup.nodes.Element;
import org.jspecify.annotations.Nullable;

import java.util.Optional;
import java.util.stream.Collectors;
import java.util.stream.Stream;

/**
* Collects a list of elements that match the supplied criteria.
Expand All @@ -21,12 +21,22 @@ private Collector() {}
@param root root of tree to descend
@return list of matches; empty if none
*/
public static Elements collect (Evaluator eval, Element root) {
eval.reset();
public static Elements collect(Evaluator eval, Element root) {
return stream(eval, root).collect(Collectors.toCollection(Elements::new));
}

/**
* Obtain a stream of elements by visiting root and every descendant of root and testing it
* against the evaluator.
* @param evaluator Evaluator to test elements against
* @param root root of tree to descend
* @return A {@link Stream} of matches
*/
public static Stream<Element> stream(Evaluator evaluator, Element root) {
evaluator.reset();

return root.stream()
.filter(eval.asPredicate(root))
.collect(Collectors.toCollection(Elements::new));
.filter(evaluator.asPredicate(root));
}

/**
Expand All @@ -37,9 +47,6 @@ public static Elements collect (Evaluator eval, Element root) {
@return the first match; {@code null} if none
*/
public static @Nullable Element findFirst(Evaluator eval, Element root) {
eval.reset();

Optional<Element> first = root.stream().filter(eval.asPredicate(root)).findFirst();
return first.orElse(null);
return stream(eval, root).findFirst().orElse(null);
}
}
27 changes: 27 additions & 0 deletions src/main/java/org/jsoup/select/Selector.java
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

import java.util.Collection;
import java.util.IdentityHashMap;
import java.util.stream.Stream;

/**
* CSS-like element selector, that finds elements matching a query.
Expand Down Expand Up @@ -115,6 +116,32 @@ public static Elements select(Evaluator evaluator, Element root) {
return Collector.collect(evaluator, root);
}

/**
* Find elements matching selector.
*
* @param query CSS selector
* @param root root element to descend into
* @return matching elements, empty if none
* @throws Selector.SelectorParseException (unchecked) on an invalid CSS query.
*/
public static Stream<Element> selectStream(String query, Element root) {
Validate.notEmpty(query);
return selectStream(QueryParser.parse(query), root);
}

/**
* Find elements matching selector.
*
* @param evaluator CSS selector
* @param root root element to descend into
* @return matching elements, empty if none
*/
public static Stream<Element> selectStream(Evaluator evaluator, Element root) {
Validate.notNull(evaluator);
Validate.notNull(root);
return Collector.stream(evaluator, root);
}

/**
* Find elements matching selector.
*
Expand Down
11 changes: 11 additions & 0 deletions src/test/java/org/jsoup/nodes/ElementTest.java
Original file line number Diff line number Diff line change
Expand Up @@ -2981,4 +2981,15 @@ void prettySerializationRoundTrips(Document.OutputSettings settings) {
assertEquals("<p CLASS=\"YES\">One</p>", p.outerHtml());
assertEquals("CLASS=\"YES\"", attr.html());
}

@Test void testSelectStream() {
Document doc = Jsoup.parse("<div>Hello world</div>");
Element div = doc.select("div").stream().findFirst().orElse(null);

assertEquals("Hello world", div.text());

div = doc.selectStream("div").findFirst().orElse(null);

assertEquals("Hello world", div.text());
}
}