Skip to content

Commit

Permalink
update example, add example classes
Browse files Browse the repository at this point in the history
  • Loading branch information
syjer committed Jan 6, 2020
1 parent 47f157f commit 82aed48
Show file tree
Hide file tree
Showing 4 changed files with 92 additions and 21 deletions.
47 changes: 26 additions & 21 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ maven:
<dependency>
<groupId>ch.digitalfondue.jfiveparse</groupId>
<artifactId>jfiveparse</artifactId>
<version>0.7.0</version>
<version>0.7.1</version>
</dependency>
```

Expand All @@ -48,27 +48,31 @@ compile 'ch.digitalfondue.jfiveparse:jfiveparse:0.7.0'
## Use:

```java
import ch.digitalfondue.jfiveparse.*;
import ch.digitalfondue.jfiveparse.Document;
import ch.digitalfondue.jfiveparse.JFiveParse;
import ch.digitalfondue.jfiveparse.Node;

public class MyTest {
import java.io.StringReader;
import java.util.List;

public class Example {

public static void main(String[] args) {
// directly from String
Parser p = new Parser();
Document doc = p.parse("<html><body>Hello world!</body></html>");
System.out.println(HtmlSerializer.serialize(doc));
Document doc = JFiveParse.parse("<html><body>Hello world!</body></html>");
System.out.println(JFiveParse.serialize(doc));

// from reader
Document doc2 = p.parse(new StringReader("<html><body>Hello world!</body></html>"));
System.out.println(HtmlSerializer.serialize(doc2));
Document doc2 = JFiveParse.parse(new StringReader("<html><body>Hello world!</body></html>"));
System.out.println(JFiveParse.serialize(doc2));

// parse fragment
List<Node> fragment = p.parseFragment(new Element("div"), "<p><span>Hello world</span></p>");
System.out.println(HtmlSerializer.serialize(fragment.get(0)));
List<Node> fragment = JFiveParse.parseFragment("<p><span>Hello world</span></p>");
System.out.println(JFiveParse.serialize(fragment.get(0)));

// parse fragment from reader
List<Node> fragment2 = p.parseFragment(new Element("div"), new StringReader("<p><span>Hello world</span></p>"));
System.out.println(HtmlSerializer.serialize(fragment2.get(0)));
List<Node> fragment2 = JFiveParse.parseFragment(new StringReader("<p><span>Hello world</span></p>"));
System.out.println(JFiveParse.serialize(fragment2.get(0)));
}
}
```
Expand All @@ -84,12 +88,14 @@ It will print:

## Examples:

See directory: https://github.com/digitalfondue/jfiveparse/tree/master/src/test/java/ch/digitalfondue/jfiveparse/example

### Fetch all titles+links on the front page of HN

```java
import ch.digitalfondue.jfiveparse.Element;
import ch.digitalfondue.jfiveparse.JFiveParse;
import ch.digitalfondue.jfiveparse.NodeMatcher;
import ch.digitalfondue.jfiveparse.Parser;
import ch.digitalfondue.jfiveparse.Selector;

import java.io.IOException;
Expand All @@ -101,14 +107,13 @@ import java.nio.charset.StandardCharsets;
public class LoadHNTitle {

public static void main(String[] args) throws IOException {
Parser p = new Parser();
Reader reader = new InputStreamReader(new URL("https://news.ycombinator.com/").openStream(), StandardCharsets.UTF_8);
// select td.title > a
NodeMatcher matcher = Selector.select().element("td").hasClass("title").withChild().element("a").toMatcher();
p.parse(reader).getAllNodesMatching(matcher).stream()
.map(Element.class::cast)
.filter(a -> !"nofollow".equals(a.getAttribute("rel"))) //remove some extraneous a elements
.forEach(a -> System.out.println(a.getTextContent() + " [" + a.getAttribute("href") + "]"));
try (Reader reader = new InputStreamReader(new URL("https://news.ycombinator.com/").openStream(), StandardCharsets.UTF_8)) {
// select td.title > a.storylink
NodeMatcher matcher = Selector.select().element("td").hasClass("title").withChild().element("a").hasClass("storylink").toMatcher();
JFiveParse.parse(reader).getAllNodesMatching(matcher).stream()
.map(Element.class::cast)
.forEach(a -> System.out.println(a.getTextContent() + " [" + a.getAttribute("href") + "]"));
}
}
}
```
Expand Down
12 changes: 12 additions & 0 deletions src/main/java/ch/digitalfondue/jfiveparse/JFiveParse.java
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,18 @@ public static List<Node> parseFragment(String input) {
return parseFragment(input, EnumSet.noneOf(Option.class));
}

public static List<Node> parseFragment(Reader input) {
return parseFragment(input, EnumSet.noneOf(Option.class));
}

public static List<Node> parseFragment(Reader input, Set<Option> options) {
return parseFragment(new Element("div", Node.NAMESPACE_HTML), input, options);
}

public static List<Node> parseFragment(Element parent, Reader input, Set<Option> options) {
return new Parser(options).parseFragment(parent, input);
}

/**
* Parse a html fragment, with a "div" element as a parent node.
*
Expand Down
29 changes: 29 additions & 0 deletions src/test/java/ch/digitalfondue/jfiveparse/example/Example.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
package ch.digitalfondue.jfiveparse.example;

import ch.digitalfondue.jfiveparse.Document;
import ch.digitalfondue.jfiveparse.JFiveParse;
import ch.digitalfondue.jfiveparse.Node;

import java.io.StringReader;
import java.util.List;

public class Example {

public static void main(String[] args) {
// directly from String
Document doc = JFiveParse.parse("<html><body>Hello world!</body></html>");
System.out.println(JFiveParse.serialize(doc));

// from reader
Document doc2 = JFiveParse.parse(new StringReader("<html><body>Hello world!</body></html>"));
System.out.println(JFiveParse.serialize(doc2));

// parse fragment
List<Node> fragment = JFiveParse.parseFragment("<p><span>Hello world</span></p>");
System.out.println(JFiveParse.serialize(fragment.get(0)));

// parse fragment from reader
List<Node> fragment2 = JFiveParse.parseFragment(new StringReader("<p><span>Hello world</span></p>"));
System.out.println(JFiveParse.serialize(fragment2.get(0)));
}
}
25 changes: 25 additions & 0 deletions src/test/java/ch/digitalfondue/jfiveparse/example/LoadHNTitle.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
package ch.digitalfondue.jfiveparse.example;

import ch.digitalfondue.jfiveparse.Element;
import ch.digitalfondue.jfiveparse.JFiveParse;
import ch.digitalfondue.jfiveparse.NodeMatcher;
import ch.digitalfondue.jfiveparse.Selector;

import java.io.IOException;
import java.io.InputStreamReader;
import java.io.Reader;
import java.net.URL;
import java.nio.charset.StandardCharsets;

public class LoadHNTitle {

public static void main(String[] args) throws IOException {
try (Reader reader = new InputStreamReader(new URL("https://news.ycombinator.com/").openStream(), StandardCharsets.UTF_8)) {
// select td.title > a.storylink
NodeMatcher matcher = Selector.select().element("td").hasClass("title").withChild().element("a").hasClass("storylink").toMatcher();
JFiveParse.parse(reader).getAllNodesMatching(matcher).stream()
.map(Element.class::cast)
.forEach(a -> System.out.println(a.getTextContent() + " [" + a.getAttribute("href") + "]"));
}
}
}

0 comments on commit 82aed48

Please sign in to comment.