# Input Parsing with `SpaCy`
Let's look at some methods for parsing and extracting meaning from user input, using the `SpaCy` package.

In [1]:
import spacy
from spacy import displacy

In [2]:
nlp = spacy.load('en_core_web_sm')

One of the things we need to be able to distinguish between is user input that is a question, that would require our chatbot to answer, or a statement, that would require our chatbot to ask a question.  
I think this is going to be information that we can get using dependancy parsing.

In [3]:
qdoc = nlp(u"How can I add a new line to my cell phone plan")

In [4]:
displacy.render(qdoc, style='dep', options={'compact': True})

In [5]:
for chunk in qdoc.noun_chunks:
    print(chunk.text, chunk.root.text, chunk.root.dep_,
            chunk.root.head.text)

In [6]:
for token in qdoc:
    print(token.text, token.dep_, token.head.text, token.head.pos_,
            [child for child in token.children])

In [7]:
[token for token in qdoc[3].subtree]

In [8]:
[token.is_stop for token in qdoc]

In [10]:
[token.lemma_ for token in qdoc if not token.is_stop]

In [11]:
spacy.explain('advmod')

In [12]:
sdoc = nlp(u"I know how to add a new line to my cell phone plan")

In [13]:
displacy.render(sdoc, style='dep', options={'compact': True})

In [14]:
for chunk in sdoc.noun_chunks:
    print(chunk.text, chunk.root.text, chunk.root.dep_,
            chunk.root.head.text)

In [15]:
for token in sdoc:
    print(token.text, token.dep_, token.head.text, token.head.pos_,
            [child for child in token.children])

### Some Other Tests

In [16]:
d1 = nlp("The man bit the dog.")

In [17]:
d2 = nlp("The dog bit the man.")

In [18]:
displacy.render(d1, style='dep', options={'compact': True})

In [19]:
displacy.render(d2, style='dep', options={'compact': True})

## Using Parsing in a Chatbot

In [20]:
doc = nlp("How did you get here?")

In [21]:
for token in doc:
    print(token.text, token.dep_, token.pos_)

In [22]:
spacy.explain('nsubj')

In [23]:
d2 = nlp("Is this the best option?")

In [24]:
from spacy.symbols import nsubj, aux

Here is a good question rule resource.
https://www.englishclub.com/grammar/questions.htm

In [25]:
def user_question(user_input):
    """In a yes no question, the root will come 
       before the nsubj
       """
    doc = nlp(user_input)
    wh_word = ['what', 'where', 'when', 'which', 'who', 'whom', 'whose', 'why', 'how']
    wh_i = -1
    q_i = -1
    for tok in doc:
        if tok.head == tok:
            root_i = tok.i
        if tok.lower_ in wh_word:
            wh_i = tok.i
        if (tok.lower_ in ['is', 'was', 'do']) and tok.i == 0:
            q_i = tok.i
            root_i -= 1
        elif tok.dep == aux:
            q_i = aux
    return (q_i > root_i) | (wh_i == 0)

In [26]:
user_question("is this the place?")

In [27]:
user_question('is this money')

In [28]:
user_question("what is a car")

In [29]:
user_question("why can you understand me?")

In [30]:
user_question("Can I know if this is the best option?")

In [31]:
user_question("Give me?")

In [32]:
user_question("Is this the best option?")

In [33]:
user_question("This is the best option?")

In [34]:
user_question("what can you understand")

In [35]:
for token in nlp("what is a car"):
    print(token.text, token.dep_, token.tag_)

In [36]:
for token in nlp("what can you understand?"):
    print(token.text, token.dep_, token.tag_)

In [37]:
for tok in d2:
    if tok.dep == nsubj:
        print(tok.i)

In [43]:
# sentence_dep("Are you?")

In [44]:
for token in d2:
    print(token.text, token.dep_)

In [45]:
d3 = nlp("This is the best option.")

In [46]:
for token in d3:
    print(token.text, token.dep_)

In [47]:
for token in nlp('what is your name'):
    print(token.text, token.dep_)

## Forming Response
One thing we need to understand is the tense that we should be responding in. How can we tell what tense the user input is?

In [48]:
pdoc = nlp(u"I go")

In [49]:
for tok in pdoc:
    print(tok.tag_)
    #print(nlp.vocab.morphology.tag_map[tok.tag_])

In [50]:
def get_tense(uinput):
    doc = nlp(uinput)
    dtl = []
    for tok in doc:
        if tok.tag_ in ['VBD', 'VBN']:
            dtl.append('PAST')
        elif tok.tag_ in ['VBG', 'VBP', 'VBZ']:
            dtl.append('PRESENT')
        else:
            dtl.append('UKNOWN')
    if 'PAST' in dtl:
        doc_tense = 'PAST'
    elif 'PRESENT' in dtl:
        doc_tense = 'PRESENT'
    else:
        doc_tense = 'UNKNOWN'
    return doc_tense

In [51]:
get_tense('were you gone?')

In [52]:
hash('this')