Skip to content
This repository has been archived by the owner on Dec 12, 2018. It is now read-only.

Commit

Permalink
Code cleanup and shortest fix ever that addresses #14 and #22.
Browse files Browse the repository at this point in the history
  • Loading branch information
dmnapolitano committed Jul 7, 2014
1 parent 541befb commit c780da3
Show file tree
Hide file tree
Showing 4 changed files with 20 additions and 67 deletions.
24 changes: 6 additions & 18 deletions src/StanfordCoreNLPHandler.java
Original file line number Diff line number Diff line change
Expand Up @@ -47,40 +47,29 @@ public class StanfordCoreNLPHandler implements StanfordCoreNLP.Iface
private StanfordTaggerThrift tagger;
private StanfordTokenizerThrift tokenizer;

// TODO: This NEEDS to be able to accept paths to alternate models other than just the Parser.
/* public StanfordCoreNLPHandler(String parserModelFilePath)
{
System.err.println("Initializing Parser...");
parser = new StanfordParserThrift(parserModelFilePath);
System.err.println("Initializing Named Entity Recognizer...");
ner = new StanfordNERThrift();
System.err.println("Initializing Coreference Resolver...");
coref = new StanfordCorefThrift();
System.err.println("Initializing Tregex...");
tregex = new StanfordTregexThrift();
System.err.println("Initializing Tagger...");
tagger = new StanfordTaggerThrift();
System.err.println("Initializing Tokenizer...");
tokenizer = new StanfordTokenizerThrift();
}
*/

public StanfordCoreNLPHandler(String configFilePath) throws Exception
{
try
{
System.err.println("Reading in configuration from " + configFilePath + "...");
CoreNLPThriftConfig config = new CoreNLPThriftConfig(configFilePath);

System.err.println("Initializing Parser...");
parser = new StanfordParserThrift(config.getParserModel());

System.err.println("Initializing Named Entity Recognizer...");
ner = new StanfordNERThrift(config.getNERModels());

System.err.println("Initializing Coreference Resolver...");
coref = new StanfordCorefThrift();

System.err.println("Initializing Tregex...");
tregex = new StanfordTregexThrift();

System.err.println("Initializing Tagger...");
tagger = new StanfordTaggerThrift(config.getTaggerModel());

System.err.println("Initializing Tokenizer...");
tokenizer = new StanfordTokenizerThrift();
}
Expand Down Expand Up @@ -131,7 +120,6 @@ public String lexicalize_parse_tree(String tree)
/* Begin Stanford NER methods */
public List<NamedEntity> get_entities_from_text(String text) throws TApplicationException
{
//return ner.getNamedEntitiesFromText(text);
List<ParseTree> parseTreeObjects = parser.parse_text(text, null);
List<String> parseTrees = CoreNLPThriftUtil.ParseTreeObjectsToString(parseTreeObjects);
return ner.getNamedEntitiesFromTrees(parseTrees);
Expand Down
12 changes: 0 additions & 12 deletions src/ner/StanfordNERThrift.java
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@ public class StanfordNERThrift
* trees.
*/

// private StanfordCoreNLP pipeline;
private NERCombinerAnnotator ner;

public StanfordNERThrift(List<String> nerModels)
Expand All @@ -62,17 +61,6 @@ public StanfordNERThrift(List<String> nerModels)
}
}


// public List<NamedEntity> getNamedEntitiesFromText(String text)
// {
// Annotation annotation = new Annotation(text);
// pipeline.annotate(annotation);

// List<CoreMap> sentences = CoreNLPThriftUtil.adjustCharacterOffsets(annotation.get(CoreAnnotations.SentencesAnnotation.class), false);
// return toNamedEntityObjects(sentences);
// }


public List<NamedEntity> getNamedEntitiesFromTrees(List<String> parseTrees)
{
Annotation annotation = CoreNLPThriftUtil.getAnnotationFromParseTrees(parseTrees);
Expand Down
51 changes: 14 additions & 37 deletions src/parser/StanfordParserThrift.java
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,6 @@ public StanfordParserThrift(String modelFile)
{
loadModel(modelFile);
tlp = new PennTreebankLanguagePack();
// treePrinter = new TreePrint("oneline", "", tlp);
}

private String TreeObjectToString(Tree tree)
Expand All @@ -80,11 +79,11 @@ private void loadModel(String modelFile)

private void setOptions(List<String> outputOptions) throws Exception
{
String outputFormatStr = "oneline";
String outputFormatStr = "oneline"; // default
String outputFormatOptionsStr = "";

// for output formatting
if (outputOptions.size() > 0 || outputOptions != null)
if (outputOptions != null && outputOptions.size() > 0)
{
int ofIndex = outputOptions.indexOf("-outputFormat");
int ofoIndex = outputOptions.indexOf("-outputFormatOptions");
Expand Down Expand Up @@ -114,6 +113,12 @@ private void setOptions(List<String> outputOptions) throws Exception
// customParserOptionsSet = true;
// }
}

//public List<ParseTree> parse_text(String text) throws TApplicationException
//{
// List<String> outputFormat = Arrays.asList(DEFAULTOPTIONS);
// return parse_text(text, outputFormat);
//}

public List<ParseTree> parse_text(String text, List<String> outputFormat) throws TApplicationException
{
Expand All @@ -140,42 +145,14 @@ public List<ParseTree> parse_text(String text, List<String> outputFormat) throws

return results;
}


/**
* @param tokens One sentence worth of tokens at a time.
* @return A ParseTree object of the String representation of the tree, plus its probability.
* @throws TApplicationException
*/
public ParseTree parse_tokens(List<String> tokens, List<String> outputFormat) throws TApplicationException
{
// List<ParseTree> results = new ArrayList<ParseTree>();

// assume an array of tokens was passed in
// This doesn't seem to be getting used much; the typical case is to pass in one sentence worth of tokens.
// This code here handled the case where you wanted two parse trees, one for each sentence "This is a sentence. It is about cats."
// and you wanted to pass in [["This", "is", "a", "sentence", "."], ["It", "is", "about", "cats", "."]]
// but instead this code is looking for ["This", "is", "a", "sentence", ".", "\n", "It", "is", "about", "cats", "."]
/*if (tokens.contains("\n"))
{
StringBuilder builder = new StringBuilder();
// at least one sentence worth of tokens
for(String token : tokens)
{
builder.append(token+" ");
}
String[] multipleSentences = builder.toString().split("\n");
for (String s : multipleSentences)
{
try
{
List<CoreLabel> crazyStanfordFormat = Sentence.toCoreLabelList(s.trim().split(" "));
Tree parseTree = parser.apply(crazyStanfordFormat);
treePrinter.printTree(parseTree, pw);
results.add(new ParseTree(sw.getBuffer().toString().trim(), parseTree.score()));
}
catch (Exception e)
{
throw new TApplicationException(TApplicationException.INTERNAL_ERROR, e.getMessage());
}
}
}
else
{*/
try
{
setOptions(outputFormat);
Expand Down
Binary file modified stanford-corenlp-wrapper.jar
Binary file not shown.

0 comments on commit c780da3

Please sign in to comment.