Permalink
Browse files

Many major changes happening...now does parsing and named entity reco…

…gnition. :)
  • Loading branch information...
dmnapolitano committed Mar 12, 2013
1 parent 8cbf8bd commit 95a9583542a158f16943e38be58fbc9aadb59100
View
@@ -1,6 +1,6 @@
-<project name="StanfordParser" default="main" basedir=".">
+<project name="StanfordCoreNLP" default="main" basedir=".">
- <description>Stanford Parser Wrapper</description>
+ <description>Stanford CoreNLP Wrapper</description>
<property name="src" location="src" />
<property name="genjava" location="gen-java" />
@@ -11,8 +11,8 @@
<fileset dir="/home/nlp-text/dynamic/NLPTools/thrift">
<include name="*.jar" />
</fileset>
- <fileset dir="/home/nlp-text/dynamic/NLPTools/stanford-parser">
- <include name="stanford-parser.jar" />
+ <fileset dir="/home/nlp-text/dynamic/NLPTools/stanford-core">
+ <include name="stanford-corenlp-1.3.4.jar" />
</fileset>
</path>
<path id="build.classpath">
@@ -36,10 +36,10 @@
<target name="generate">
<!-- Generate the thrift gen-java and gen-py source -->
<exec executable="/home/nlp-text/dynamic/NLPTools/thrift/bin/thrift" failonerror="true">
- <arg line="--gen java parser.thrift"/>
+ <arg line="--gen java corenlp.thrift"/>
</exec>
<exec executable="/home/nlp-text/dynamic/NLPTools/thrift/bin/thrift" failonerror="true">
- <arg line="--gen py:utf8strings,slots,new_style parser.thrift"/>
+ <arg line="--gen py:utf8strings,slots,new_style corenlp.thrift"/>
</exec>
</target>
@@ -49,14 +49,14 @@
</target>
<target name="main" description="Run" depends="compile">
- <jar jarfile="stanford-parser-wrapper.jar" basedir="${build}"/>
+ <jar jarfile="stanford-corenlp-wrapper.jar" basedir="${build}"/>
</target>
<target name="clean">
<delete dir="${genjava}" />
<delete dir="${genpy}" />
<delete dir="${build}" />
- <delete file="stanford-parser-wrapper.jar" />
+ <delete file="stanford-corenlp-wrapper.jar" />
</target>
</project>
View
@@ -0,0 +1,31 @@
+namespace java CoreNLP
+namespace py corenlp
+
+struct ParseTree
+{
+ 1:string tree,
+ 2:double score
+}
+
+struct NamedEntity
+{
+ 1:string entity,
+ 2:string tag,
+ 3:i32 startOffset,
+ 4:i32 endOffset
+}
+
+exception SerializedException
+{
+ 1: required binary payload
+}
+
+service StanfordCoreNLP
+{
+ void ping(),
+ list<ParseTree> parse_text(1:string text, 2:list<string> outputFormat),
+ ParseTree parse_tokens(1:list<string> tokens, 2:list<string> outputFormat),
+ oneway void zip(),
+ list<NamedEntity> getNamedEntitiesFromText(1:string text),
+ list<NamedEntity> getNamedEntitiesFromTrees(1:list<string> trees)
+}
@@ -1,109 +0,0 @@
-#!/usr/bin/env python
-#
-# Autogenerated by Thrift Compiler (0.9.0)
-#
-# DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING
-#
-# options string: py:utf8strings,slots,new_style
-#
-
-import sys
-import pprint
-from urlparse import urlparse
-from thrift.transport import TTransport
-from thrift.transport import TSocket
-from thrift.transport import THttpClient
-from thrift.protocol import TBinaryProtocol
-
-import StanfordParser
-from ttypes import *
-
-if len(sys.argv) <= 1 or sys.argv[1] == '--help':
- print ''
- print 'Usage: ' + sys.argv[0] + ' [-h host[:port]] [-u url] [-f[ramed]] function [arg1 [arg2...]]'
- print ''
- print 'Functions:'
- print ' void ping()'
- print ' parse_text(string text, outputFormat)'
- print ' ParseTree parse_tokens( tokens, outputFormat)'
- print ' void zip()'
- print ''
- sys.exit(0)
-
-pp = pprint.PrettyPrinter(indent = 2)
-host = 'localhost'
-port = 9090
-uri = ''
-framed = False
-http = False
-argi = 1
-
-if sys.argv[argi] == '-h':
- parts = sys.argv[argi+1].split(':')
- host = parts[0]
- if len(parts) > 1:
- port = int(parts[1])
- argi += 2
-
-if sys.argv[argi] == '-u':
- url = urlparse(sys.argv[argi+1])
- parts = url[1].split(':')
- host = parts[0]
- if len(parts) > 1:
- port = int(parts[1])
- else:
- port = 80
- uri = url[2]
- if url[4]:
- uri += '?%s' % url[4]
- http = True
- argi += 2
-
-if sys.argv[argi] == '-f' or sys.argv[argi] == '-framed':
- framed = True
- argi += 1
-
-cmd = sys.argv[argi]
-args = sys.argv[argi+1:]
-
-if http:
- transport = THttpClient.THttpClient(host, port, uri)
-else:
- socket = TSocket.TSocket(host, port)
- if framed:
- transport = TTransport.TFramedTransport(socket)
- else:
- transport = TTransport.TBufferedTransport(socket)
-protocol = TBinaryProtocol.TBinaryProtocol(transport)
-client = StanfordParser.Client(protocol)
-transport.open()
-
-if cmd == 'ping':
- if len(args) != 0:
- print 'ping requires 0 args'
- sys.exit(1)
- pp.pprint(client.ping())
-
-elif cmd == 'parse_text':
- if len(args) != 2:
- print 'parse_text requires 2 args'
- sys.exit(1)
- pp.pprint(client.parse_text(args[0],eval(args[1]),))
-
-elif cmd == 'parse_tokens':
- if len(args) != 2:
- print 'parse_tokens requires 2 args'
- sys.exit(1)
- pp.pprint(client.parse_tokens(eval(args[0]),eval(args[1]),))
-
-elif cmd == 'zip':
- if len(args) != 0:
- print 'zip requires 0 args'
- sys.exit(1)
- pp.pprint(client.zip())
-
-else:
- print 'Unrecognized method %s' % cmd
- sys.exit(1)
-
-transport.close()
Oops, something went wrong.

0 comments on commit 95a9583

Please sign in to comment.