# Parser
Parse a sentence such that it can be reformulated as a base answer and used to query document

In [1]:
import nltk
import os
from nltk.parse import CoreNLPParser
import subprocess
  

In [8]:
sentence = "Gyarados (ギャラドス, Gyaradosu,  or ) is a Pokémon species in Nintendo and Game Freak's Pokémon franchise. Created by Ken Sugimori, Gyarados first appeared in the video games Pokémon Red and Pokemon Green and subsequent sequels, later appearing in various merchandise, spinoff titles and animated and printed adaptations of the franchise."

https://github.com/nltk/nltk/wiki/Stanford-CoreNLP-API-in-NLTK

In the directory with the Stanford CoreNLP run the following command to start the server at port 9000

java -mx4g -cp "*" edu.stanford.nlp.pipeline.StanfordCoreNLPServer \
-preload tokenize,ssplit,pos,lemma,ner,parse,depparse \
-status_port 9000 -port 9000 -timeout 15000 &

It is connected when the final line is:
[main] INFO CoreNLP - StanfordCoreNLPServer listening at /0:0:0:0:0:0:0:0:port_#_here

Remember to STOP the server when finished (then kill job)

NOTE: Only one parser object can be attached to a port at a time
NOTE: This method does not require English Jar download from https://stanfordnlp.github.io/CoreNLP/download.html
NOTE: If the thread did not shutdown properly, you may have to locate the process named "java" to shut it down manually

In [9]:
# Lexical Parser
parser = CoreNLPParser(url='http://localhost:9000')

In [5]:
result_split = list(parser.parse(sentence.split()))
print(result_split)
result_tokenize = list(parser.parse(parser.tokenize(sentence)))
print(result_tokenize)
print(result_split == result_tokenize)

[Tree('ROOT', [Tree('S', [Tree('NP', [Tree('NP', [Tree('NN', ['Gyarados'])]), Tree('PRN', [Tree('-LRB-', ['-LRB-']), Tree('NP', [Tree('NP', [Tree('CD', ['ギャラドス'])]), Tree(',', [',']), Tree('NP', [Tree('NNP', ['Gyaradosu'])]), Tree(',', [',']), Tree('CC', ['or']), Tree('-RRB-', ['-RRB-'])])])]), Tree('VP', [Tree('VBZ', ['is']), Tree('NP', [Tree('NP', [Tree('NP', [Tree('DT', ['a']), Tree('NNP', ['Pokémon']), Tree('NNS', ['species'])]), Tree('PP', [Tree('IN', ['in']), Tree('NP', [Tree('NNP', ['Nintendo']), Tree('CC', ['and']), Tree('NNP', ['Game'])])])]), Tree('SBAR', [Tree('S', [Tree('NP', [Tree('NP', [Tree('NNP', ['Freak']), Tree('POS', ["'s"])]), Tree('NNP', ['Pokémon']), Tree('NN', ['franchise']), Tree('.', ['.'])]), Tree('VP', [Tree('VBN', ['Created']), Tree('PP', [Tree('IN', ['by']), Tree('NP', [Tree('NP', [Tree('NNP', ['Ken']), Tree('NNP', ['Sugimori'])]), Tree(',', [',']), Tree('S', [Tree('NP', [Tree('NNP', ['Gyarados'])]), Tree('ADVP', [Tree('RB', ['first'])]), Tree('VP', [Tree('

# Attempt Using nltk.parse.stanford

Download stanford-parser-full-2018-10-17

https://janakiev.com/blog/python-shell-commands/

https://stackoverflow.com/questions/40820140/how-to-get-englishpcfg-ser-gz-as-a-single-file

https://stackoverflow.com/questions/46407837/nltk-was-unable-to-find-stanford-parser-jar-set-the-classpath-environment-vari

https://stanfordnlp.github.io/CoreNLP/download.html

https://docs.huihoo.com/nltk/3.2/api/nltk.parse.html

Mentions not to use this method
https://github.com/nltk/nltk/issues/2344 
Will be deprecated soon (I also ran into the error that started the thread)

Code
#https://stackoverflow.com/questions/41522476/stanford-parser-for-python-output-format
#https://nlp.stanford.edu/software/lex-parser.shtml
os.environ['STANFORD_PARSER'] = 'stanford-parser-full-2018-10-17'
os.environ['STANFORD_MODELS'] = 'stanford-parser-full-2018-10-17'
parser = GenericStanfordParser(model_path="englishPCFG.ser.gz")
sentences = parser.raw_parse_sents(("Hello, My name is Melroy.", "What is your name?"))
print(sentences)

#GUI
for line in sentences:
    for sentence in line:
        sentence.draw()

# Starting and stopping the CoreNLP process within python

https://stackoverflow.com/questions/4789837/how-to-terminate-a-python-subprocess-launched-with-shell-true

https://janakiev.com/blog/python-shell-commands/

https://docs.python.org/2/library/subprocess.html

https://stackoverflow.com/questions/24387451/how-can-i-kill-whatever-process-is-using-port-8080-so-that-i-can-vagrant-up/24388281

https://stackoverflow.com/questions/34341336/oserror-errno-1-operation-not-permitted-python

https://stackoverflow.com/questions/20291233/to-get-parent-and-childprocess-id-from-process-id-in-python/20291892

https://psutil.readthedocs.io/en/latest/

https://askubuntu.com/questions/98377/how-to-hide-terminal-output-when-executing-a-command

https://stackoverflow.com/questions/4789837/how-to-terminate-a-python-subprocess-launched-with-shell-true <-- Had permission problems with this however

In [24]:
#Potential work with query reformulation
#See if this is more useful with that
query1 = "What is Gyrados most similar in appearance to?"
query2 = "Where is Gyrados first seen?" #This requires noting "appeared" and "seen" would be synonyms
query3 = "Where is a red Gyarados found in Pokémon Gold?"
query4 = "What franchise is Gyarados in?" #Note: the word "franchise" is lost in the regex, but would still be used in the query
query5 = "Who is Gyarados voiced by?"
query6 = "What is Gyarados known as?"
query7 = "How is the growth of Gyarados from Pokémon Diamond and Pearl to Pokémon Black and White described by IGN?"
#Note: Question could not start with wh-question word - eg. "Gyarados is voiced by who?"

In [23]:
#Yes/No questions
query8 = "Is Gyarados voiced by Unshō Ishizuka?"
query9 = "does Gyarados have four white fins down its back?"
#TODO: If "Gyarados have four white fins down its back? is considered a valid question, then would register as a S also and would have to add in order to do necessary rearranging"
query10 = "Was Gyarados seen in Pokémon Shipwreck?"
query11 = "Has Gyarados been shown in the Magikarp Festival"
query12 = "Is Gyarados in a franchise?"

In [25]:
#Meanings of tags
#https://stackoverflow.com/questions/13027908/stanford-parser-tags
#https://gist.github.com/nlothian/9240750
#Generally yes/no = SQ after Root and wh = SBARQ after Root, but there are some exceptions (eg. query6 being S)

queries = [query1, query2, query5, query7, query8, query9, query10, query11]
count = 1
result_tokenize = []
for q in queries:
    print(count)
    count += 1
    output = list(parser.parse(parser.tokenize(q)))
    result_tokenize.append(output)
    print(output)
    print()

1
[Tree('ROOT', [Tree('SBARQ', [Tree('WHNP', [Tree('WP', ['What'])]), Tree('SQ', [Tree('VBZ', ['is']), Tree('NP', [Tree('NNP', ['Gyrados'])]), Tree('ADJP', [Tree('RBS', ['most']), Tree('JJ', ['similar']), Tree('PP', [Tree('IN', ['in']), Tree('NP', [Tree('NN', ['appearance'])])]), Tree('TO', ['to'])])]), Tree('.', ['?'])])])]

2
[Tree('ROOT', [Tree('SBARQ', [Tree('WHADVP', [Tree('WRB', ['Where'])]), Tree('SQ', [Tree('VBZ', ['is']), Tree('NP', [Tree('NNP', ['Gyrados'])]), Tree('VP', [Tree('ADVP', [Tree('RB', ['first'])]), Tree('VBN', ['seen'])])]), Tree('.', ['?'])])])]

3
[Tree('ROOT', [Tree('SBARQ', [Tree('WHNP', [Tree('WP', ['Who'])]), Tree('SQ', [Tree('VBZ', ['is']), Tree('NP', [Tree('NNP', ['Gyarados'])]), Tree('VP', [Tree('VBN', ['voiced']), Tree('PP', [Tree('IN', ['by'])])])]), Tree('.', ['?'])])])]

4
[Tree('ROOT', [Tree('SBARQ', [Tree('WHADVP', [Tree('WRB', ['How'])]), Tree('SQ', [Tree('VBZ', ['is']), Tree('NP', [Tree('NP', [Tree('DT', ['the']), Tree('NN', ['growth'])]), Tree('P

In [None]:
#TODO: Recreate "is" reformulation from regex with parse instead

In [None]:
#TODO: Use vectorize reformulated question (with and without function words removed as separate tests) and determine most likely sentence by taking cosine similarity with vectorized sentences

In [14]:
# Lexical Parser
parser = CoreNLPParser(url='http://localhost:9000')

In [15]:

os.system("cd stanford-corenlp-full-2018-10-05; java -mx4g -cp '*' edu.stanford.nlp.pipeline.StanfordCoreNLPServer \
-preload tokenize,ssplit,pos,lemma,ner,parse,depparse \
-status_port 9000 -port 9000 -timeout 15000 &")

result_split = list(parser.parse(sentence.split()))
print(result_split)
result_tokenize = list(parser.parse(parser.tokenize(sentence)))
print(result_tokenize)
print(result_split == result_tokenize)

ConnectionError: HTTPConnectionPool(host='localhost', port=9001): Max retries exceeded with url: /?properties=%7B%22outputFormat%22%3A+%22json%22%2C+%22annotators%22%3A+%22tokenize%2Cpos%2Clemma%2Cssplit%2Cparse%22%2C+%22ssplit.eolonly%22%3A+%22true%22%7D (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x119113490>: Failed to establish a new connection: [Errno 61] Connection refused'))