Skip to content

Commit

Permalink
use smarter algorithm to find PDF iframe, fixes gh-6
Browse files Browse the repository at this point in the history
  • Loading branch information
dnet committed Feb 12, 2014
1 parent 41fbc9f commit c15dd5a
Showing 1 changed file with 11 additions and 1 deletion.
12 changes: 11 additions & 1 deletion src/hu/vsza/adsapi/Part.java
Expand Up @@ -4,6 +4,7 @@
import java.net.URL;
import java.net.URLConnection;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
Expand Down Expand Up @@ -34,11 +35,20 @@ public URLConnection getPdfConnection() throws IOException {
String viewPageUrl = viewPageLink.absUrl("href");

doc = Jsoup.connect(viewPageUrl).referrer(href).userAgent(UA).header("Accept-Language", "en").get();
Element pdfIframe = doc.select("td iframe").get(0);
Element pdfIframe = getDatasheetIframe(doc);
String pdfUrl = pdfIframe.absUrl("src");

URLConnection pdfConnection = new URL(pdfUrl).openConnection();
pdfConnection.setRequestProperty("Referer", viewPageUrl);
return pdfConnection;
}

protected static Element getDatasheetIframe(Element doc) {
List<Element> iframes = doc.select("iframe");
for (Element iframe : iframes) {
String src = iframe.attr("src");
if (!src.startsWith("http") && !src.startsWith("//")) return iframe;
}
return iframes.get(0);
}
}

0 comments on commit c15dd5a

Please sign in to comment.