Code cleanup and shortest fix ever that addresses #14 and #22.

dmnapolitano · Jul 7, 2014 · c780da3 · c780da3
1 parent 541befb
commit c780da3
Show file tree

Hide file tree

Showing 4 changed files with 20 additions and 67 deletions.
diff --git a/src/StanfordCoreNLPHandler.java b/src/StanfordCoreNLPHandler.java
@@ -47,40 +47,29 @@ public class StanfordCoreNLPHandler implements StanfordCoreNLP.Iface
     private StanfordTaggerThrift tagger;
     private StanfordTokenizerThrift tokenizer;
 
-    // TODO: This NEEDS to be able to accept paths to alternate models other than just the Parser.
-/*    public StanfordCoreNLPHandler(String parserModelFilePath)
-    {
-    	System.err.println("Initializing Parser...");
-    	parser = new StanfordParserThrift(parserModelFilePath);
-    	System.err.println("Initializing Named Entity Recognizer...");
-    	ner = new StanfordNERThrift();
-    	System.err.println("Initializing Coreference Resolver...");
-    	coref = new StanfordCorefThrift();
-    	System.err.println("Initializing Tregex...");
-    	tregex = new StanfordTregexThrift();
-    	System.err.println("Initializing Tagger...");
-    	tagger = new StanfordTaggerThrift();
-    	System.err.println("Initializing Tokenizer...");
-    	tokenizer = new StanfordTokenizerThrift();
-    }
-*/
 
     public StanfordCoreNLPHandler(String configFilePath) throws Exception
     {
     	try
     	{
         	System.err.println("Reading in configuration from " + configFilePath + "...");
     		CoreNLPThriftConfig config = new CoreNLPThriftConfig(configFilePath);
+
     		System.err.println("Initializing Parser...");
     		parser = new StanfordParserThrift(config.getParserModel());
+
     		System.err.println("Initializing Named Entity Recognizer...");
     		ner = new StanfordNERThrift(config.getNERModels());
+
     		System.err.println("Initializing Coreference Resolver...");
     		coref = new StanfordCorefThrift();
+
     		System.err.println("Initializing Tregex...");
     		tregex = new StanfordTregexThrift();
+
     		System.err.println("Initializing Tagger...");
     		tagger = new StanfordTaggerThrift(config.getTaggerModel());
+
     		System.err.println("Initializing Tokenizer...");
     		tokenizer = new StanfordTokenizerThrift();
     	}
@@ -131,7 +120,6 @@ public String lexicalize_parse_tree(String tree)
     /* Begin Stanford NER methods */
     public List<NamedEntity> get_entities_from_text(String text) throws TApplicationException
     {
-    	//return ner.getNamedEntitiesFromText(text);
     	List<ParseTree> parseTreeObjects = parser.parse_text(text, null);
     	List<String> parseTrees = CoreNLPThriftUtil.ParseTreeObjectsToString(parseTreeObjects);
     	return ner.getNamedEntitiesFromTrees(parseTrees);

diff --git a/src/ner/StanfordNERThrift.java b/src/ner/StanfordNERThrift.java
@@ -44,7 +44,6 @@ public class StanfordNERThrift
 	 * trees.
 	 */
 
-//	private StanfordCoreNLP pipeline;
 	private NERCombinerAnnotator ner;
 
 	public StanfordNERThrift(List<String> nerModels)
@@ -62,17 +61,6 @@ public StanfordNERThrift(List<String> nerModels)
 		}
 	}
 
-
-//	public List<NamedEntity> getNamedEntitiesFromText(String text)
-//	{
-//		Annotation annotation = new Annotation(text);
-//		pipeline.annotate(annotation);
-
-//		List<CoreMap> sentences = CoreNLPThriftUtil.adjustCharacterOffsets(annotation.get(CoreAnnotations.SentencesAnnotation.class), false); 
-//		return toNamedEntityObjects(sentences);
-//	}
-
-
 	public List<NamedEntity> getNamedEntitiesFromTrees(List<String> parseTrees)
 	{	
 		Annotation annotation = CoreNLPThriftUtil.getAnnotationFromParseTrees(parseTrees);

diff --git a/src/parser/StanfordParserThrift.java b/src/parser/StanfordParserThrift.java
@@ -57,7 +57,6 @@ public StanfordParserThrift(String modelFile)
     {
         loadModel(modelFile);
         tlp = new PennTreebankLanguagePack();
-//        treePrinter = new TreePrint("oneline", "", tlp);
     }
 
 	private String TreeObjectToString(Tree tree)
@@ -80,11 +79,11 @@ private void loadModel(String modelFile)
 
 	private void setOptions(List<String> outputOptions) throws Exception
     {
-        String outputFormatStr = "oneline";
+        String outputFormatStr = "oneline";  // default
         String outputFormatOptionsStr = "";
 
         // for output formatting
-        if (outputOptions.size() > 0 || outputOptions != null)
+        if (outputOptions != null && outputOptions.size() > 0)
         {
         	int ofIndex = outputOptions.indexOf("-outputFormat");
         	int ofoIndex = outputOptions.indexOf("-outputFormatOptions");
@@ -114,6 +113,12 @@ private void setOptions(List<String> outputOptions) throws Exception
 //        	customParserOptionsSet = true;
 //        }
     }
+
+	//public List<ParseTree> parse_text(String text) throws TApplicationException
+	//{
+	//	List<String> outputFormat = Arrays.asList(DEFAULTOPTIONS);
+	//	return parse_text(text, outputFormat);
+	//}
 
     public List<ParseTree> parse_text(String text, List<String> outputFormat) throws TApplicationException
     {
@@ -140,42 +145,14 @@ public List<ParseTree> parse_text(String text, List<String> outputFormat) throws
 
         return results;
     }
-
+
+    /**
+     * @param tokens One sentence worth of tokens at a time.
+     * @return A ParseTree object of the String representation of the tree, plus its probability.
+     * @throws TApplicationException
+     */
     public ParseTree parse_tokens(List<String> tokens, List<String> outputFormat) throws TApplicationException
     {
-//        List<ParseTree> results = new ArrayList<ParseTree>();
-
-    	// assume an array of tokens was passed in
-        // This doesn't seem to be getting used much; the typical case is to pass in one sentence worth of tokens.
-        // This code here handled the case where you wanted two parse trees, one for each sentence "This is a sentence.  It is about cats."
-        // and you wanted to pass in [["This", "is", "a", "sentence", "."], ["It", "is", "about", "cats", "."]]
-        // but instead this code is looking for ["This", "is", "a", "sentence", ".", "\n", "It", "is", "about", "cats", "."]
-    	/*if (tokens.contains("\n"))
-    	{
-    		StringBuilder builder = new StringBuilder();
-    		// at least one sentence worth of tokens
-    		for(String token : tokens)
-    		{
-    			builder.append(token+" ");
-    		}
-    		String[] multipleSentences = builder.toString().split("\n");
-    		for (String s : multipleSentences)
-    		{
-    			try
-    			{
-    				List<CoreLabel> crazyStanfordFormat = Sentence.toCoreLabelList(s.trim().split(" "));
-    				Tree parseTree = parser.apply(crazyStanfordFormat);
-    				treePrinter.printTree(parseTree, pw);
-    				results.add(new ParseTree(sw.getBuffer().toString().trim(), parseTree.score()));
-    			}
-    			catch (Exception e)
-    			{
-    				throw new TApplicationException(TApplicationException.INTERNAL_ERROR, e.getMessage());
-    			}
-    		}
-    	}
-    	else
-    	{*/
         try
         {
         	setOptions(outputFormat);

diff --git a/stanford-corenlp-wrapper.jar b/stanford-corenlp-wrapper.jar