Skip to content

Commit

Permalink
Improved multipage image extraction
Browse files Browse the repository at this point in the history
  • Loading branch information
dafi committed Feb 14, 2016
1 parent ac63b09 commit c84282b
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 2 deletions.
2 changes: 1 addition & 1 deletion app/src/main/assets/domSelectors.json
Expand Up @@ -86,7 +86,7 @@
"hotflick.net": {
"image": ".pict_content img",
"container": ".box-item-155-img img",
"multiPage": ".box-paging a:not(.active)"
"multiPage": ".box-paging a.active ~ a"
},
"carreck.com": {
"title": ".postentry p"
Expand Down
Expand Up @@ -298,12 +298,14 @@ private void extractImageFromMultiPage(List<ImageInfo> imageInfoList, DOMSelecto
if (selector.getMultiPage() == null) {
return;
}
for (Element element : startPageDocument.select(selector.getMultiPage())) {
Element element = startPageDocument.select(selector.getMultiPage()).first();
while (element != null) {
String pageUrl = element.absUrl("href");
String pageContent = readURLContent(pageUrl);
Document pageDocument = Jsoup.parse(pageContent);
pageDocument.setBaseUri(pageUrl);
extractImages(imageInfoList, domSelectorFinder.getSelectorFromUrl(pageUrl), pageDocument);
element = pageDocument.select(selector.getMultiPage()).first();
}
}

Expand Down

0 comments on commit c84282b

Please sign in to comment.