Browse files

Added Element.textNodes() and Element.dataNodes(), to easily access a…

…n element's children text nodes and data nodes.
  • Loading branch information...
1 parent f31916f commit 7b9f17760049161b775fd23b15653961620e259d @jhy committed Aug 30, 2011
Showing with 89 additions and 0 deletions.
  1. +2 −0 CHANGES
  2. +46 −0 src/main/java/org/jsoup/nodes/Element.java
  3. +41 −0 src/test/java/org/jsoup/nodes/ElementTest.java
View
2 CHANGES
@@ -2,6 +2,8 @@ jsoup changelog
*** Release 1.6.2 [PENDING]
* Added jsoup.connect.cookies(Map) method, to set multiple cookies at once, possibly from a prior request.
+
+ * Added Element.textNodes() and Element.dataNodes(), to easily access an element's children text nodes and data nodes.
* Updated jsoup.connect so that when requests made as POSTs are redirected, the redirect is followed as a GET.
<https://github.com/jhy/jsoup/issues/120>
View
46 src/main/java/org/jsoup/nodes/Element.java
@@ -193,6 +193,48 @@ public Elements children() {
}
/**
+ * Get this element's child text nodes. The list is unmodifiable but the text nodes may be manipulated.
+ * <p/>
+ * This is effectively a filter on {@link #childNodes()} to get Text nodes.
+ * @return child text nodes. If this element has no text nodes, returns an
+ * empty list.
+ * <p/>
+ * For example, with the input HTML: {@code <p>One <span>Two</span> Three <br> Four</p>} with the {@code p} element selected:
+ * <ul>
+ * <li>{@code p.text()} = {@code "One Two Three Four"}</li>
+ * <li>{@code p.ownText()} = {@code "One Three Four"}</li>
+ * <li>{@code p.children()} = {@code Elements[<span>, <br>]}</li>
+ * <li>{@code p.childNodes()} = {@code List<Node>["One ", <span>, " Three ", <br>, " Four"]}</li>
+ * <li>{@code p.textNodes()} = {@code List<TextNode>["One ", " Three ", " Four"]}</li>
+ * </ul>
+ */
+ public List<TextNode> textNodes() {
+ List<TextNode> textNodes = new ArrayList<TextNode>();
+ for (Node node : childNodes) {
+ if (node instanceof TextNode)
+ textNodes.add((TextNode) node);
+ }
+ return Collections.unmodifiableList(textNodes);
+ }
+
+ /**
+ * Get this element's child data nodes. The list is unmodifiable but the data nodes may be manipulated.
+ * <p/>
+ * This is effectively a filter on {@link #childNodes()} to get Data nodes.
+ * @return child data nodes. If this element has no data nodes, returns an
+ * empty list.
+ * @see #data()
+ */
+ public List<DataNode> dataNodes() {
+ List<DataNode> dataNodes = new ArrayList<DataNode>();
+ for (Node node : childNodes) {
+ if (node instanceof DataNode)
+ dataNodes.add((DataNode) node);
+ }
+ return Collections.unmodifiableList(dataNodes);
+ }
+
+ /**
* Find elements that match the {@link Selector} CSS query, with this element as the starting context. Matched elements
* may include this element, or any of its children.
* <p/>
@@ -740,6 +782,7 @@ public Elements getAllElements() {
*
* @return unencoded text, or empty string if none.
* @see #ownText()
+ * @see #textNodes()
*/
public String text() {
StringBuilder sb = new StringBuilder();
@@ -772,6 +815,7 @@ private void text(StringBuilder accum) {
*
* @return unencoded text, or empty string if none.
* @see #text()
+ * @see #textNodes()
*/
public String ownText() {
StringBuilder sb = new StringBuilder();
@@ -847,6 +891,8 @@ public boolean hasText() {
/**
* Get the combined data of this element. Data is e.g. the inside of a {@code script} tag.
* @return the data, or empty string if none
+ *
+ * @see #dataNodes()
*/
public String data() {
StringBuilder sb = new StringBuilder();
View
41 src/test/java/org/jsoup/nodes/ElementTest.java
@@ -486,5 +486,46 @@
assertTrue(doc.html().contains(doc.select("div").outerHtml()));
}
+ @Test public void testGetTextNodes() {
+ Document doc = Jsoup.parse("<p>One <span>Two</span> Three <br> Four</p>");
+ List<TextNode> textNodes = doc.select("p").first().textNodes();
+ assertEquals(3, textNodes.size());
+ assertEquals("One ", textNodes.get(0).text());
+ assertEquals(" Three ", textNodes.get(1).text());
+ assertEquals(" Four", textNodes.get(2).text());
+
+ assertEquals(0, doc.select("br").first().textNodes().size());
+ }
+
+ @Test public void testManipulateTextNodes() {
+ Document doc = Jsoup.parse("<p>One <span>Two</span> Three <br> Four</p>");
+ Element p = doc.select("p").first();
+ List<TextNode> textNodes = p.textNodes();
+
+ textNodes.get(1).text(" three-more ");
+ textNodes.get(2).splitText(3).text("-ur");
+
+ assertEquals("One Two three-more Fo-ur", p.text());
+ assertEquals("One three-more Fo-ur", p.ownText());
+ assertEquals(4, p.textNodes().size()); // grew because of split
+ }
+
+ @Test public void testGetDataNodes() {
+ Document doc = Jsoup.parse("<script>One Two</script> <style>Three Four</style> <p>Fix Six</p>");
+ Element script = doc.select("script").first();
+ Element style = doc.select("style").first();
+ Element p = doc.select("p").first();
+
+ List<DataNode> scriptData = script.dataNodes();
+ assertEquals(1, scriptData.size());
+ assertEquals("One Two", scriptData.get(0).getWholeData());
+
+ List<DataNode> styleData = style.dataNodes();
+ assertEquals(1, styleData.size());
+ assertEquals("Three Four", styleData.get(0).getWholeData());
+
+ List<DataNode> pData = p.dataNodes();
+ assertEquals(0, pData.size());
+ }
}

0 comments on commit 7b9f177

Please sign in to comment.