In [7]:
!python -m spacy download en_core_web_lg

[38;5;2m✔ Download and installation successful[0m
You can now load the model via spacy.load('en_core_web_lg')


In [0]:
import spacy
import requests
import datetime
import json
nlp = spacy.load('en')

In [0]:
def print_intent(params):
    text = params[0]
    intent = params[1]
    predicate = params[2]
    objects = params[3]
    if not intent:
        print('No intent for "' + text + '", found "' + predicate.text + '" as the root.')
    else:
        print(text,'==>',intent,objects)

    print('-----------------')

In [0]:
test = [
    ("the movie was directed by george clooney","direct"),
    ("movies directed by george clooney","direct"),
    ("movies by george clooney","direct"),
    ("directed by george clooney","direct"),
    ("george clooney director","direct"),
    ("george clooney directing with brad pitt","direct"),
    ("george clooney directed brad pitt","direct"),
    ("george clooney directed by brad pitt","direct"),
    ("The movie stars george clooney","star"),
    ("starring george clooney","star"),
    ("george clooney starred","star"),
    ("george clooney star","star"),
    ("costarring george clooney and brad pitt","costar"),
    ("george clooney and brad pitt","costar"),
    ("george clooney with brad pitt","costar"),
    ("best george clooney movie","best"),
    ("best movie of 2012","best"),
    ("Tom Hanks' best movie","best"),
    ("popular julia roberts","best"),
    ("2012 worst","worst"),
    ("panned julia roberts","worst"),
    ("bad julia roberts","worst")
]

## Let's get the root!

We use the **root** of the sentence/clause as the predicate in the subject-predicate-object structure.

In [4]:
def intent_from_root(text):
    doc = nlp(text)
    intent = None
    predicate = None
    objects = [ent.text for ent in doc.ents]
    
    predicate_taxonomy = {
        'directed':['direct','director','by'],
        'cast':['star','feature','cameo','costarring']
    }
        
    #get root of the clause
    for tok in doc:
        if tok.dep_ == "ROOT":
            predicate = tok
    
    #See if the intent is in the taxonomy
    for kind in predicate_taxonomy.keys():
        if predicate.lemma_ in predicate_taxonomy[kind]:
            intent = kind
            
    return [text,intent,predicate,objects]

for t in test:
    print_intent(intent_from_root(t[0]))

the movie was directed by george clooney ==> directed []
-----------------
No intent for "movies directed by george clooney", found "movies" as the root.
-----------------
No intent for "movies by george clooney", found "movies" as the root.
-----------------
directed by george clooney ==> directed []
-----------------
george clooney director ==> directed []
-----------------
No intent for "george clooney directing with brad pitt", found "clooney" as the root.
-----------------
george clooney directed brad pitt ==> directed []
-----------------
No intent for "george clooney directed by brad pitt", found "clooney" as the root.
-----------------
The movie stars george clooney ==> cast []
-----------------
starring george clooney ==> cast []
-----------------
george clooney starred ==> cast []
-----------------
george clooney star ==> cast []
-----------------
No intent for "costarring george clooney and brad pitt", found "costarring" as the root.
-----------------
No intent for "george c

In [5]:
def intent_from_root2(text):
    doc = nlp(text)
    intent = None
    predicate = None
    objects = [ent.text for ent in doc.ents]
    
    predicate_taxonomy = {
        'directed':['direct','director'],
        'cast':['star','feature','cameo']
    }
        
    #get root of the clause
    for tok in doc:
        print(tok.text,tok.dep_,tok.pos_)
        if tok.dep_ == "ROOT":
            predicate = tok
    
    print(predicate,objects)
    
    #See if the intent is in the taxonomy
    for kind in predicate_taxonomy.keys():
        if predicate.lemma_ in predicate_taxonomy[kind]:
            intent = kind
        
    return [text,intent,predicate,objects]
            
for t in test:
    print_intent(intent_from_root2(t[0]))

the det DET
movie nsubjpass NOUN
was auxpass VERB
directed ROOT VERB
by agent ADP
george compound NOUN
clooney pobj NOUN
directed []
the movie was directed by george clooney ==> directed []
-----------------
movies ROOT NOUN
directed acl VERB
by agent ADP
george compound NOUN
clooney pobj NOUN
movies []
No intent for "movies directed by george clooney", found "movies" as the root.
-----------------
movies ROOT NOUN
by prep ADP
george compound NOUN
clooney pobj NOUN
movies []
No intent for "movies by george clooney", found "movies" as the root.
-----------------
directed ROOT VERB
by agent ADP
george compound NOUN
clooney pobj NOUN
directed []
directed by george clooney ==> directed []
-----------------
george compound NOUN
clooney compound NOUN
director ROOT NOUN
director []
george clooney director ==> directed []
-----------------
george compound NOUN
clooney ROOT NOUN
directing acl VERB
with prep ADP
brad amod NOUN
pitt pobj NOUN
clooney []
No intent for "george clooney directing wit