Merge pull request #1 from abhaga/master

Fix for the skipping of non alphanumeric words in a sentence
dasmith · Oct 10, 2011 · 14b6306 · 14b6306
2 parents 29ee872 + 8ed7640
commit 14b6306
Showing 1 changed file with 3 additions and 2 deletions.
diff --git a/corenlp.py b/corenlp.py
@@ -48,9 +48,10 @@ def parse_parser_results(text):
                 print line
                 raise Exception("Parse error. Could not find [Text=")
             tmp['words'] = [] 
-            exp = re.compile('\[([a-zA-Z0-9=. ]+)\]')
+            exp = re.compile('\[([^\]]+)\]')
             matches  = exp.findall(line)
             for s in matches:
+                print s
                 # split into attribute-value list 
                 av = re.split("=| ", s) 
                 # make [ignore,ignore,a,b,c,d] into [[a,b],[c,d]]
@@ -137,7 +138,7 @@ def __init__(self):
                 sys.exit(1)
 
         # spawn the server
-        self._server = pexpect.spawn("%s -Xmx3g -cp %s %s %s" % (java_path, ':'.join(jars), classname, props))
+        self._server = pexpect.spawn("%s -Xmx1800m -cp %s %s %s" % (java_path, ':'.join(jars), classname, props))
 
         print "Starting the Stanford Core NLP parser."
         self.state = "plays hard to get, smiles from time to time"