Skip to content
This repository has been archived by the owner on Oct 20, 2018. It is now read-only.

Commit

Permalink
Browse files Browse the repository at this point in the history
Merge pull request #397 from sandroacoelho/fix-encoding
Fixing issues #9 , #13
  • Loading branch information
sandroacoelho committed Feb 16, 2016
2 parents 8aa25ac + 11b3960 commit 42edda2
Showing 1 changed file with 9 additions and 4 deletions.
Expand Up @@ -5,8 +5,10 @@
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.dbpedia.spotlight.exceptions.InputException;
import org.xml.sax.InputSource;

import javax.ws.rs.core.Response;
import java.io.IOException;
import java.net.MalformedURLException;
import java.net.URL;

Expand Down Expand Up @@ -49,18 +51,21 @@ public static String getTextToProcess(String text, String inUrl) throws InputExc
textToProcess = text;
}else if (!inUrl.equals("")) {
LOG.info("Parsing URL to get main content");
URL url = null;
try {
url = new URL(inUrl);
URL url = new URL(inUrl);
InputSource is = new InputSource();
is.setEncoding("UTF-8");
is.setByteStream(url.openStream());
textToProcess = ArticleExtractor.INSTANCE.getText(url);
} catch (MalformedURLException e) {
// e.printStackTrace();
LOG.error("Input URL is not valid");
textToProcess = "";
} catch (BoilerpipeProcessingException e) {
e.printStackTrace();
LOG.error("Boilerpipe Cannot process the web page");
textToProcess = "";
} catch (IOException e) {
LOG.error("Input URL is not available");
textToProcess = "";
}

}else{
Expand Down

0 comments on commit 42edda2

Please sign in to comment.