Permalink
Browse files

Now with limited Tregex-ability.

  • Loading branch information...
1 parent 94591b1 commit 0bd2e64e5b406df85e902b915d46e32b0265e353 @dmnapolitano committed Apr 15, 2013
View
@@ -9,6 +9,7 @@ Things you can do with it:
- Parse Trees **See README_parser.md**
- Named Entities **See README_ner.md**
- Resolved Coreferences **See README_coref.md**
+ - Evaluate Stanford Tregex patterns over parse trees **See README_tregex.md **
* Send unicode (optional), receive unicode (always).
* Do these things in a multithreaded way without having to think about it too much (Thrift provides ten threads).
* Communicate with the server using the language of your choice (with some additional coding if your choice isn't "Java" or "Python").
View
@@ -0,0 +1,8 @@
+How to Run Regular Expressions over Parse Trees with Stanford Tregex via this Apache Thrift Server
+==================================================================================================
+
+## How to Interact with the Methods and Data Structures
+
+Presently, there is only one method, `evaluate_tregex_pattern(parse_tree, tregex_pattern)` where `parse_tree` is a Java `String`/Python `str` or `unicode` containing a single sentence's parse tree (probably the output from the Stanford Parser), and `tregex_pattern` is a Java `String`/Python `str` or `unicode` containing a valid Tregex pattern that you wish to evaluate on this tree.
+The return value is a Java `ArrayList<String>`/Python `unicode` list, where each element is a match against the parse tree of the specified Tregex pattern.
+I'm pretty sure `parse_tree` can be in ANY of the Stanford Parser output formats, although the only one I have tried is the `oneline` format.
View
@@ -32,5 +32,6 @@ service StanfordCoreNLP
list<NamedEntity> get_entities_from_trees(1:list<string> trees),
list<string> resolve_coreferences_in_text(1:string text),
list<string> resolve_coreferences_in_tokenized_sentences(1:list<string> sentencesWithTokensSeparatedBySpace),
- list<string> resolve_coreferences_in_trees(1:list<string> trees)
+ list<string> resolve_coreferences_in_trees(1:list<string> trees),
+ list<string> evaluate_tregex_pattern(1:string parseTree, 2:string tregexPattern)
}
@@ -34,6 +34,7 @@ if len(sys.argv) <= 1 or sys.argv[1] == '--help':
print ' resolve_coreferences_in_text(string text)'
print ' resolve_coreferences_in_tokenized_sentences( sentencesWithTokensSeparatedBySpace)'
print ' resolve_coreferences_in_trees( trees)'
+ print ' evaluate_tregex_pattern(string parseTree, string tregexPattern)'
print ''
sys.exit(0)
@@ -151,6 +152,12 @@ elif cmd == 'resolve_coreferences_in_trees':
sys.exit(1)
pp.pprint(client.resolve_coreferences_in_trees(eval(args[0]),))
+elif cmd == 'evaluate_tregex_pattern':
+ if len(args) != 2:
+ print 'evaluate_tregex_pattern requires 2 args'
+ sys.exit(1)
+ pp.pprint(client.evaluate_tregex_pattern(args[0],args[1],))
+
else:
print 'Unrecognized method %s' % cmd
sys.exit(1)

Some generated files are not rendered by default. Learn more.

Oops, something went wrong.
View
@@ -68,32 +68,32 @@
#outputOptions = ["-outputFormat", "oneline"] # Same as specifying "None", as above.
-#try:
-# parse_trees = client.parse_text(arbitrary_text, outputOptions)
-# for result in parse_trees:
-# sys.stdout.write(result.tree.strip() + " [" + str(result.score) + "]\n")
-#except Exception as e:
-# print e
-
-#print
-
-#for sentence in tokenized_sentences:
-# try:
-# tree = client.parse_tokens(sentence, outputOptions)
-# sys.stdout.write(tree.tree.strip() + " [" + str(tree.score) + "]\n")
-# except Exception as e:
-# print e
-
-#print
-
-#for sentence in more_tokenized_sentences:
-# try:
-# tree = client.parse_tokens(sentence, outputOptions)
-# sys.stdout.write(tree.tree.strip()+"\n")
-# except Exception as e:
-# print e
-
-tree = client.parse_tokens(tokenized_sentence, outputOptions)
+try:
+ parse_trees = client.parse_text(arbitrary_text, outputOptions)
+ for result in parse_trees:
+ sys.stdout.write(result.tree.strip() + " [" + str(result.score) + "]\n")
+except Exception as e:
+ print e
+
+print
+
+for sentence in tokenized_sentences:
+ try:
+ tree = client.parse_tokens(sentence, outputOptions)
+ sys.stdout.write(tree.tree.strip() + " [" + str(tree.score) + "]\n")
+ except Exception as e:
+ print e
+
+print
+
+for sentence in more_tokenized_sentences:
+ try:
+ tree = client.parse_tokens(sentence, outputOptions)
+ sys.stdout.write(tree.tree.strip()+"\n")
+ except Exception as e:
+ print e
+
+tree = client.parse_tokens(tokenized_sentence, None)
sys.stdout.write(tree.tree.strip() + "\n")
tree = client.parse_tagged_sentence(tagged_sentence, outputOptions, "/")
@@ -7,6 +7,7 @@
import edu.stanford.nlp.pipeline.Annotation;
import parser.StanfordParserThrift;
+import tregex.StanfordTregexThrift;
import CoreNLP.*;
@@ -22,6 +23,7 @@
private StanfordParserThrift parser;
private StanfordNERThrift ner;
private StanfordCorefThrift coref;
+ private StanfordTregexThrift tregex;
public StanfordCoreNLPHandler()
{
@@ -31,6 +33,8 @@ public StanfordCoreNLPHandler()
ner = new StanfordNERThrift();
System.err.println("Initializing Coreference Resolver...");
coref = new StanfordCorefThrift();
+ System.err.println("Initializing Tregex...");
+ tregex = new StanfordTregexThrift();
}
@@ -123,6 +127,14 @@ public ParseTree parse_tagged_sentence(String taggedSentence, List<String> outpu
/* End Stanford Coref methods */
+ /* Begin Stanford Tregex methods */
+ public List<String> evaluate_tregex_pattern(String parseTree, String tregexPattern)
+ {
+ return tregex.evaluateTregexPattern(parseTree, tregexPattern);
+ }
+ /* End Stanford Tregex methods */
+
+
public void ping()
{
System.out.println("ping()");
Oops, something went wrong.

0 comments on commit 0bd2e64

Please sign in to comment.