Add CoreNLP server as main parser

Michael Young · Michael Young · commit 24f8ac44f291 · 2016-08-08T01:02:02.000-07:00
diff --git a/.gitignore b/.gitignore
@@ -1,5 +1,6 @@
 .DS_Store
 stanford-parser*
+stanford-corenlp*
 build*
 dist*
 Lango.egg-info*
diff --git a/LICENSE.txt b/LICENSE.txt
diff --git a/docs.md b/docs.md
@@ -0,0 +1,16 @@
+# Docs
+
+Pip Installs
+```
+sphinx-autobuild==0.6.0
+sphinx-rtd-theme==0.1.9
+sphinxcontrib-napoleon==0.5.0
+```
+
+Generate docs
+```
+sphinx-apidoc -f -e -o docs lango
+cd docs
+make html
+```
+
diff --git a/docs/installation.rst b/docs/installation.rst
@@ -8,23 +8,22 @@ Install package with pip
 
     pip install lango
 
-Download Stanford Models and Parser
+Download Stanford CoreNLP
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-Make sure you have Java installed for the Stanford parser to work.
+Make sure you have Java installed for the Stanford CoreNLP to work.
 
-`Download Stanford Parser`_
+`Download Stanford CoreNLP`_
 
-Set Environment Variables
+Extract to any folder
+
+Run Server
 ~~~~~~~~~~~~~~~~~~~~~~~~~
 
-Set environment variables for STANFORD\_PARSER and STANFORD\_MODELS to
-where you downloaded the parser.
+In extracted folder, run the following command to start the server:
 
-.. code:: python
+::
 
-    import os
-    os.environ['STANFORD_PARSER'] = 'stanford-parser-full-2015-12-09'
-    os.environ['STANFORD_MODELS'] = 'stanford-parser-full-2015-12-09'
+    java -mx4g -cp "*" edu.stanford.nlp.pipeline.StanfordCoreNLPServer
 
-.. _Download Stanford Parser: http://nlp.stanford.edu/software/stanford-parser-full-2015-12-09.zip
+.. _Download Stanford CoreNLP: http://stanfordnlp.github.io/CoreNLP/#download
diff --git a/examples/matching.py b/examples/matching.py
@@ -1,13 +1,10 @@
 from collections import OrderedDict
 import os
-from lango.parser import StanfordLibParser
+from lango.parser import StanfordServerParser
 from lango.matcher import match_rules
 
 
-os.environ['STANFORD_PARSER'] = 'stanford-parser-full-2015-12-09'
-os.environ['STANFORD_MODELS'] = 'stanford-parser-full-2015-12-09'
-
-parser = StanfordLibParser()
+parser = StanfordServerParser()
 
 sents = [
     'Call me an Uber.',
diff --git a/examples/parser_input.py b/examples/parser_input.py
@@ -1,12 +1,9 @@
 import os
-from lango.parser import StanfordLibParser
+from lango.parser import StanfordServerParser
 from lango.matcher import match_rules
 
-os.environ['STANFORD_PARSER'] = 'stanford-parser-full-2015-12-09'
-os.environ['STANFORD_MODELS'] = 'stanford-parser-full-2015-12-09'
-
 def main():
-    parser = StanfordLibParser()
+    parser = StanfordServerParser()
     while True:
         try:
             line = raw_input("Enter line: ")
diff --git a/lango/parser.py b/lango/parser.py
@@ -1,5 +1,7 @@
-from nltk.parse.stanford import StanfordParser
+from nltk.parse.stanford import StanfordParser, GenericStanfordParser
 from nltk.internals import find_jars_within_path
+from nltk.tree import Tree
+from pycorenlp import StanfordCoreNLP
 
 
 class Parser:
@@ -25,6 +27,7 @@ def parse(self, line):
 
         Returns:
             Tree object representing parsed sentence
+            None if parse fails
         """
         tree = list(self.parser.raw_parse(line))[0]
         tree = tree[0]
@@ -37,4 +40,28 @@ def __init__(self):
         self.parser = StanfordParser(
             model_path='edu/stanford/nlp/models/lexparser/englishPCFG.ser.gz')
         stanford_dir = self.parser._classpath[0].rpartition('/')[0]
-        self.parser._classpath = tuple(find_jars_within_path(stanford_dir))
+        self.parser._classpath = tuple(find_jars_within_path(stanford_dir))
+
+
+class StanfordServerParser(Parser, GenericStanfordParser):
+    """Follow the readme to setup the Stanford CoreNLP server"""
+    def __init__(self, host='localhost', port=9000):
+        url = 'http://{0}:{1}'.format(host, port)
+        self.nlp = StanfordCoreNLP(url)
+
+    def _make_tree(self, result):
+        return Tree.fromstring(result)
+
+    def parse(self, sent):
+        output = self.nlp.annotate(sent, properties={
+            'annotators': 'parse',
+            'outputFormat': 'json'
+        })
+
+        # Got random html, return empty tree
+        if isinstance(output, unicode):
+            return Tree('', [])
+
+        parse_output = output['sentences'][0]['parse'] + '\n\n'
+        tree = next(next(self._parse_trees_output(parse_output)))[0]
+        return tree
diff --git a/readme.md b/readme.md
@@ -13,21 +13,19 @@ Lango is a natural language processing library for working with the building blo
 pip install lango
 ```
 
-### Download Stanford Models and Parser
+### Download Stanford CoreNLP
 
-Make sure you have Java installed for the Stanford parser to work.
+Make sure you have Java installed for the Stanford CoreNLP to work.
 
-[Download Stanford Parser](http://nlp.stanford.edu/software/stanford-parser-full-2015-12-09.zip)
+[Download Stanford CoreNLP](http://stanfordnlp.github.io/CoreNLP/#download)
 
-### Set Environment Variables
+Extract to any folder
 
-Set environment variables for STANFORD_PARSER and STANFORD_MODELS to where you
-downloaded the parser.
+### Run the Stanford CoreNLP server
 
-```python
-import os
-os.environ['STANFORD_PARSER'] = 'stanford-parser-full-2015-12-09'
-os.environ['STANFORD_MODELS'] = 'stanford-parser-full-2015-12-09'
+Run the following command in the folder where you extracted Stanford CoreNLP
+```
+java -mx4g -cp "*" edu.stanford.nlp.pipeline.StanfordCoreNLPServer
 ```
 
 ## Docs
@@ -218,10 +216,10 @@ Returned context:
 Full code:
 
 ```python
-from lango.parser import StanfordLibParser
+from lango.parser import StanfordServerParser
 from lango.matcher import match_rules
 
-parser = StanfordLibParser()
+parser = StanfordServerParser()
 
 rules = {
   '( S ( NP:np ) ( VP ( VBD:action-o ) ( PP:pp ) ) )': {
diff --git a/requirements.txt b/requirements.txt
@@ -1,4 +1,2 @@
 nltk==3.1
-sphinx-autobuild==0.6.0
-sphinx-rtd-theme==0.1.9
-sphinxcontrib-napoleon==0.5.0
+pycorenlp==0.3.0
diff --git a/setup.py b/setup.py
@@ -2,7 +2,7 @@
 
 setup(
     name='Lango',
-    version='0.11',
+    version='0.12',
     description='Natural Language Framework for Matching Parse Trees and Modeling Conversation',
     packages=find_packages(),
     author='Michael Young',