Skip to content

Commit

Permalink
OPENNLP-1476 Modernize DictionaryEntryPersistor to create XMLReader v…
Browse files Browse the repository at this point in the history
…ia javax.xml.parsers.SAXParserFactory (#512)
  • Loading branch information
mawiesne committed Mar 9, 2023
1 parent ccdf0c2 commit 8535da9
Showing 1 changed file with 13 additions and 9 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParserFactory;
import javax.xml.transform.OutputKeys;
import javax.xml.transform.Transformer;
import javax.xml.transform.TransformerConfigurationException;
Expand All @@ -37,7 +39,6 @@
import org.xml.sax.SAXException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.AttributesImpl;
import org.xml.sax.helpers.XMLReaderFactory;

import opennlp.tools.dictionary.Dictionary;
import opennlp.tools.util.InvalidFormatException;
Expand All @@ -51,6 +52,9 @@
* @see Dictionary
*/
public class DictionaryEntryPersistor {

private static final SAXParserFactory SAX_PARSER_FACTORY = SAXParserFactory.newInstance();
private static final String SAX_FEATURE_NAMESPACES = "http://xml.org/sax/features/namespaces";

// TODO: should check for invalid format, make it save
private static class DictionaryContenthandler implements ContentHandler {
Expand Down Expand Up @@ -199,14 +203,11 @@ public void startPrefixMapping(String prefix, String uri)
}
}

private static final String CHARSET = StandardCharsets.UTF_8.name();

private static final String DICTIONARY_ELEMENT = "dictionary";
private static final String ENTRY_ELEMENT = "entry";
private static final String TOKEN_ELEMENT = "token";
private static final String ATTRIBUTE_CASE_SENSITIVE = "case_sensitive";


/**
* Creates {@link Entry}s from the given {@link InputStream} and
* forwards these {@link Entry}s to the {@link EntryInserter}.
Expand All @@ -225,16 +226,19 @@ public void startPrefixMapping(String prefix, String uri)
public static boolean create(InputStream in, EntryInserter inserter)
throws IOException {

DictionaryContenthandler profileContentHandler =
new DictionaryContenthandler(inserter);
DictionaryContenthandler profileContentHandler = new DictionaryContenthandler(inserter);

XMLReader xmlReader;
try {
xmlReader = XMLReaderFactory.createXMLReader();
xmlReader = SAX_PARSER_FACTORY.newSAXParser().getXMLReader();
// Note:
// There is a compatibility problem here: JAXP default is false while SAX 2 default is true!
// OpenNLP requires it activated!
xmlReader.setFeature(SAX_FEATURE_NAMESPACES, true);
xmlReader.setContentHandler(profileContentHandler);
xmlReader.parse(new InputSource(new UncloseableInputStream(in)));
}
catch (SAXException e) {
catch (ParserConfigurationException | SAXException e) {
throw new InvalidFormatException("The profile data stream has " +
"an invalid format!", e);
}
Expand Down Expand Up @@ -290,7 +294,7 @@ public static void serialize(OutputStream out, Iterator<Entry> entries,
}

Transformer serializer = hd.getTransformer();
serializer.setOutputProperty(OutputKeys.ENCODING, CHARSET);
serializer.setOutputProperty(OutputKeys.ENCODING, StandardCharsets.UTF_8.name());
serializer.setOutputProperty(OutputKeys.INDENT, "yes");

hd.setResult(streamResult);
Expand Down

0 comments on commit 8535da9

Please sign in to comment.