Permalink
Browse files

Refactored to encapsulate references to LexicalizedParser, providing …

…just one getBestParse(String) method, using a standard default configuration for the parser.
  • Loading branch information...
1 parent 454d6cb commit 5ed9f4c8637ddb96e5b0639938a815a980f65233 @jimtyhurst committed Apr 10, 2011
View
3 README
@@ -6,7 +6,8 @@ My goal is to provide access to a parser that can handle simple English sentence
## Usage
-See the unit tests in LexicalizedParserTest for examples.
+See the unit tests in LexicalizedParserTest for examples. For example,
+Tree tree = getParserModule().buildParser().getBestParse("every male student read some book");
## Dependencies
View
63 src/com/tyhurst/stanfordparser/util/DefaultEnglishParser.java
@@ -0,0 +1,63 @@
+package com.tyhurst.stanfordparser.util;
+
+import java.io.StringReader;
+import java.util.List;
+
+import edu.stanford.nlp.ling.Word;
+import edu.stanford.nlp.objectbank.TokenizerFactory;
+import edu.stanford.nlp.parser.lexparser.LexicalizedParser;
+import edu.stanford.nlp.process.PTBTokenizer;
+import edu.stanford.nlp.process.Tokenizer;
+import edu.stanford.nlp.process.WordTokenFactory;
+import edu.stanford.nlp.trees.Tree;
+
+/**
+ * DefaultEnglishParser is a facade for a LexicalizedParser,
+ * providing very limited functionality, but it is easy to
+ * use.
+ */
+public class DefaultEnglishParser implements SimpleParser {
+
+ private LexicalizedParser parser;
+ private TreeUtil treeUtil;
+ private TokenizerFactory<Word> tokenizerFactory;
+
+ public DefaultEnglishParser(LexicalizedParser parser, TreeUtil treeUtil) {
+ this.parser = parser;
+ this.treeUtil = treeUtil;
+ }
+
+ public Tree getBestParse(String sentence) {
+ List<Word> tokens = tokenize(sentence);
+ LexicalizedParser parser = getParser();
+ parser.parse(tokens);
+ Tree tree = parser.getBestParse();
+ Tree stringLabeledTree = getTreeUtil().treeToStringLabeledTree(tree);
+ return stringLabeledTree;
+ }
+
+ private List<Word> tokenize(String sentence) {
+ return buildTokenizer(sentence).tokenize();
+ }
+
+ private Tokenizer<Word> buildTokenizer(String sentence) {
+ return getTokenizerFactory().getTokenizer(new StringReader(sentence));
+ }
+
+ private LexicalizedParser getParser() {
+ parser.reset();
+ return parser;
+ }
+
+ private TreeUtil getTreeUtil() {
+ return treeUtil;
+ }
+
+ private TokenizerFactory<Word> getTokenizerFactory() {
+ if (tokenizerFactory == null) {
+ tokenizerFactory = PTBTokenizer.factory(false, new WordTokenFactory());
+ }
+ return tokenizerFactory;
+ }
+
+}
View
21 src/com/tyhurst/stanfordparser/util/EnglishParserModule.java
@@ -1,29 +1,20 @@
package com.tyhurst.stanfordparser.util;
-import java.io.StringReader;
-
-import edu.stanford.nlp.ling.Word;
-import edu.stanford.nlp.objectbank.TokenizerFactory;
import edu.stanford.nlp.parser.lexparser.LexicalizedParser;
-import edu.stanford.nlp.process.PTBTokenizer;
-import edu.stanford.nlp.process.Tokenizer;
-import edu.stanford.nlp.process.WordTokenFactory;
import edu.stanford.nlp.trees.GrammaticalStructureFactory;
import edu.stanford.nlp.trees.PennTreebankLanguagePack;
import edu.stanford.nlp.trees.TreebankLanguagePack;
-
+/**
+ * Builds utility objects for accessing the Stanford Parser with default configuration values
+ * for an English parser.
+ */
public class EnglishParserModule implements ParserModule {
private static final String PARSER_CONFIGURATION_FILE = "./resources/englishPCFG.ser.gz";
- public LexicalizedParser buildParser() {
- return new LexicalizedParser(PARSER_CONFIGURATION_FILE);
- }
-
- public Tokenizer<Word> buildTokenizer(String sentence) {
- TokenizerFactory<Word> factory = PTBTokenizer.factory(false, new WordTokenFactory());
- return factory.getTokenizer(new StringReader(sentence));
+ public SimpleParser buildParser() {
+ return new DefaultEnglishParser(new LexicalizedParser(PARSER_CONFIGURATION_FILE), buildTreeUtil());
}
public TreeUtil buildTreeUtil() {
View
10 src/com/tyhurst/stanfordparser/util/ParserModule.java
@@ -1,15 +1,13 @@
package com.tyhurst.stanfordparser.util;
-import edu.stanford.nlp.ling.Word;
-import edu.stanford.nlp.parser.lexparser.LexicalizedParser;
-import edu.stanford.nlp.process.Tokenizer;
+/**
+ * Builds utility objects for accessing the Stanford Parser with default configuration values.
+ */
public interface ParserModule {
- LexicalizedParser buildParser();
+ SimpleParser buildParser();
- Tokenizer<Word> buildTokenizer(String sentence);
-
TreeUtil buildTreeUtil();
}
View
9 src/com/tyhurst/stanfordparser/util/SimpleParser.java
@@ -0,0 +1,9 @@
+package com.tyhurst.stanfordparser.util;
+
+import edu.stanford.nlp.trees.Tree;
+
+public interface SimpleParser {
+
+ Tree getBestParse(String sentence);
+
+}
View
4 src/com/tyhurst/stanfordparser/util/TreeUtil.java
@@ -8,6 +8,10 @@
import edu.stanford.nlp.trees.TreeFunctions;
import edu.stanford.nlp.trees.TypedDependency;
+/**
+ * TreeUtil is a facade for handling Tree objects, using default
+ * configuration values.
+ */
public class TreeUtil {
private GrammaticalStructureFactory grammaticalStructureFactory;
View
75 test/src/com/tyhurst/stanfordparser/LexicalizedParserTest.java
@@ -10,22 +10,27 @@
import com.tyhurst.stanfordparser.util.EnglishParserModule;
import com.tyhurst.stanfordparser.util.ParserModule;
+import com.tyhurst.stanfordparser.util.SimpleParser;
import com.tyhurst.stanfordparser.util.TreeUtil;
-import edu.stanford.nlp.ling.Word;
-import edu.stanford.nlp.parser.lexparser.LexicalizedParser;
import edu.stanford.nlp.trees.Tree;
import edu.stanford.nlp.trees.TypedDependency;
+/**
+ * LexicalizedParserTest tests a SimpleParser for a few basic cases,
+ * where a SimpleParser is a simplified facade to the LexicalizedParser class.
+ */
public class LexicalizedParserTest {
+ private static final String EXPECTED_LABEL_JJ = "JJ";
+ private static final String EXPECTED_LABEL_NP = "NP";
private static final String EXPECTED_LABEL_ROOT = "ROOT";
private static final String EXPECTED_LABEL_S = "S";
- private static final String EXPECTED_LABEL_NP = "NP";
+ private static final String EXPECTED_LABEL_VBP = "VBP";
private static final String EXPECTED_LABEL_VP = "VP";
private static ParserModule parserModule;
- private static LexicalizedParser parser;
+ private static SimpleParser parser;
private static TreeUtil treeUtil;
@BeforeClass
@@ -38,7 +43,7 @@ public static void setup() {
* tree = (ROOT (S (NP (DT every) (JJ male) (NN student)) (VP (VBP read) (NP (DT some) (NN book)))))
*/
@Test public void testSimpleSVO() {
- Tree tree = getBestParse("every male student read some book");
+ Tree tree = getParser().getBestParse("every male student read some book");
assertEquals(EXPECTED_LABEL_ROOT, tree.label().value());
Tree actualSNode = tree.firstChild();
@@ -47,22 +52,37 @@ public static void setup() {
assertEquals(EXPECTED_LABEL_NP, actualNPNode.label().value());
Tree actualVPNode = actualSNode.getChild(1);
assertEquals(EXPECTED_LABEL_VP, actualVPNode.label().value());
+ Tree actualVNode = actualVPNode.getChild(0);
+ assertEquals(EXPECTED_LABEL_VBP, actualVNode.label().value());
}
/**
* Expect:
- * tree = (ROOT (S (NP (DT every) (NN student)) (VP (VBP read) (NP (DT some) (NN book)))))
- * dependencies = [det(student-3, every-1), amod(student-3, male-2), nsubj(read-4, student-3), det(book-6, some-5), dobj(read-4, book-6)]
+ * tree = (ROOT (S (NP (DT some) (JJ female) (NN dog)) (VP (VBZ bites) (NP (DT every) (NN student)))))
+ * dependencies = [det(dog-3, some-1), amod(dog-3, female-2), nsubj(bites-4, dog-3), det(student-6, every-5), dobj(bites-4, student-6)]
*/
@Test public void testSimpleSVODependencies() {
- Tree tree = getBestParse("every male student read some book");
- List<TypedDependency> dependencies = getTreeUtil().getTypedDependencies(tree);
+ Tree tree = getParser().getBestParse("some female dog bites every student");
+ assertEquals(EXPECTED_LABEL_ROOT, tree.label().value());
+ Tree actualSNode = tree.firstChild();
+ assertEquals(EXPECTED_LABEL_S, actualSNode.label().value());
+ Tree actualNPNode = actualSNode.getChild(0);
+ assertEquals(EXPECTED_LABEL_NP, actualNPNode.label().value());
+ Tree actualJJNode = actualNPNode.getChild(1);
+ assertEquals(EXPECTED_LABEL_JJ, actualJJNode.label().value());
+ Tree actualVPNode = actualSNode.getChild(1);
+ assertEquals(EXPECTED_LABEL_VP, actualVPNode.label().value());
+ List<TypedDependency> dependencies = getTreeUtil().getTypedDependencies(tree);
assertTrue(dependencies.size() == 5);
TypedDependency detDependency = dependencies.get(0);
assertEquals("det", detDependency.reln().getShortName());
- assertEquals("every-1", detDependency.dep().toString());
- assertEquals("student-3", detDependency.gov().toString());
+ assertEquals("some-1", detDependency.dep().toString());
+ assertEquals("dog-3", detDependency.gov().toString());
+ TypedDependency dobjDependency = dependencies.get(dependencies.size() - 1);
+ assertEquals("dobj", dobjDependency.reln().getShortName());
+ assertEquals("student-6", dobjDependency.dep().toString());
+ assertEquals("bites-4", dobjDependency.gov().toString());
}
/**
@@ -71,34 +91,33 @@ public static void setup() {
* dependencies = [det(student-3, every-1), amod(student-3, male-2), nsubj(read-4, student-3), nsubj(kissed-8, student-3), det(book-6, some-5), dobj(read-4, book-6), conj_and(read-4, kissed-8), det(girl-10, a-9), dobj(kissed-8, girl-10)]
*/
@Test public void testSVODependenciesWithConjunction() {
- Tree tree = getBestParse("every male student read some book and kissed a girl");
- List<TypedDependency> dependencies = getTreeUtil().getTypedDependencies(tree);
+ Tree tree = getParser().getBestParse("every male student read some book and kissed a girl");
+ assertEquals(EXPECTED_LABEL_ROOT, tree.label().value());
+ Tree actualSNode = tree.firstChild();
+ assertEquals(EXPECTED_LABEL_S, actualSNode.label().value());
+ Tree actualNPNode = actualSNode.getChild(0);
+ assertEquals(EXPECTED_LABEL_NP, actualNPNode.label().value());
+ Tree actualJJNode = actualNPNode.getChild(1);
+ assertEquals(EXPECTED_LABEL_JJ, actualJJNode.label().value());
+ Tree actualVPNode = actualSNode.getChild(1);
+ assertEquals(EXPECTED_LABEL_VP, actualVPNode.label().value());
+ List<TypedDependency> dependencies = getTreeUtil().getTypedDependencies(tree);
assertTrue(dependencies.size() == 9);
TypedDependency detDependency = dependencies.get(0);
assertEquals("det", detDependency.reln().getShortName());
assertEquals("every-1", detDependency.dep().toString());
assertEquals("student-3", detDependency.gov().toString());
+ TypedDependency dobjDependency = dependencies.get(dependencies.size() - 1);
+ assertEquals("dobj", dobjDependency.reln().getShortName());
+ assertEquals("girl-10", dobjDependency.dep().toString());
+ assertEquals("kissed-8", dobjDependency.gov().toString());
}
- private Tree getBestParse(String sentence) {
- List<Word> tokens = tokenize(sentence);
- LexicalizedParser parser = getParser();
- parser.parse(tokens);
- Tree tree = parser.getBestParse();
- Tree stringLabeledTree = getTreeUtil().treeToStringLabeledTree(tree);
- return stringLabeledTree;
- }
-
- private List<Word> tokenize(String sentence) {
- return getParserModule().buildTokenizer(sentence).tokenize();
- }
-
- private LexicalizedParser getParser() {
+ private SimpleParser getParser() {
if (parser == null) {
parser = getParserModule().buildParser();
}
- parser.reset();
return parser;
}

0 comments on commit 5ed9f4c

Please sign in to comment.