diff --git a/rest/src/main/java/org/dbpedia/spotlight/web/rest/ServerUtils.java b/rest/src/main/java/org/dbpedia/spotlight/web/rest/ServerUtils.java index 8435d6afb..80f7aff44 100644 --- a/rest/src/main/java/org/dbpedia/spotlight/web/rest/ServerUtils.java +++ b/rest/src/main/java/org/dbpedia/spotlight/web/rest/ServerUtils.java @@ -5,8 +5,10 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.dbpedia.spotlight.exceptions.InputException; +import org.xml.sax.InputSource; import javax.ws.rs.core.Response; +import java.io.IOException; import java.net.MalformedURLException; import java.net.URL; @@ -49,18 +51,21 @@ public static String getTextToProcess(String text, String inUrl) throws InputExc textToProcess = text; }else if (!inUrl.equals("")) { LOG.info("Parsing URL to get main content"); - URL url = null; try { - url = new URL(inUrl); + URL url = new URL(inUrl); + InputSource is = new InputSource(); + is.setEncoding("UTF-8"); + is.setByteStream(url.openStream()); textToProcess = ArticleExtractor.INSTANCE.getText(url); } catch (MalformedURLException e) { - // e.printStackTrace(); LOG.error("Input URL is not valid"); textToProcess = ""; } catch (BoilerpipeProcessingException e) { - e.printStackTrace(); LOG.error("Boilerpipe Cannot process the web page"); textToProcess = ""; + } catch (IOException e) { + LOG.error("Input URL is not available"); + textToProcess = ""; } }else{