# Getting the dependency parse

In [1]:
sent = 'I need a ticket to Los Angeles on May 8th.'

In [2]:
import spacy
nlp = spacy.load("en_core_web_sm")
doc = nlp(sent)

for token in doc:
    print('%s\t%s\t%s\t%s\t%s' %(token.text, token.pos_, token.dep_, \
                        spacy.explain(token.dep_), token.head.text))

I	PRON	nsubj	nominal subject	need
need	VERB	ROOT	None	need
a	DET	det	determiner	ticket
ticket	NOUN	dobj	direct object	need
to	ADP	prep	prepositional modifier	ticket
Los	PROPN	compound	compound	Angeles
Angeles	PROPN	pobj	object of preposition	to
on	ADP	prep	prepositional modifier	need
May	PROPN	compound	compound	8th
8th	NOUN	pobj	object of preposition	on
.	PUNCT	punct	punctuation	need


In [3]:
doc = nlp(sent)

for token in doc:
    if token.ent_type != 0:
        print(token.text, token.ent_type_)

Los GPE
Angeles GPE
May DATE
8th DATE


In [4]:
ner_s = 'Apple investors urged to vote against a nearly \
$100 million pay package for CEO Tim Cook.'

doc = nlp(ner_s)

for token in doc:
    if token.ent_type != 0:
        print(token.text, token.ent_type_)

Apple ORG
nearly MONEY
$ MONEY
100 MONEY
million MONEY
Tim PERSON
Cook PERSON


# Extracting noun chunks

In [5]:
for noun_chunk in doc.noun_chunks:
    print(noun_chunk.text)

Apple investors
a nearly $100 million pay package
CEO
Tim Cook


# Extracting subjects, predicates, and objects of the sentence

In [6]:
sentence = 'I established my own workshop in 2018 before I went to Japan.'
doc = nlp(sentence)
for token in doc:
    print('%s\t%s\t%s\t%s\t%s\t%d' %(token.text, token.pos_, token.dep_, \
                        spacy.explain(token.dep_), token.head.text, token.head.i))

I	PRON	nsubj	nominal subject	established	1
established	VERB	ROOT	None	established	1
my	PRON	poss	possession modifier	workshop	4
own	ADJ	amod	adjectival modifier	workshop	4
workshop	NOUN	dobj	direct object	established	1
in	ADP	prep	prepositional modifier	established	1
2018	NUM	pobj	object of preposition	in	5
before	SCONJ	mark	marker	went	9
I	PRON	nsubj	nominal subject	went	9
went	VERB	advcl	adverbial clause modifier	established	1
to	ADP	prep	prepositional modifier	went	9
Japan	PROPN	pobj	object of preposition	to	10
.	PUNCT	punct	punctuation	established	1


In [7]:
for token in doc:
    subtree = list(token.subtree)
    print(token, subtree)

I [I]
established [I, established, my, own, workshop, in, 2018, before, I, went, to, Japan, .]
my [my]
own [own]
workshop [my, own, workshop]
in [in, 2018]
2018 [2018]
before [before]
I [I]
went [before, I, went, to, Japan]
to [to, Japan]
Japan [Japan]
. [.]


In [8]:
verb_idxs = [(i, token) for i, token in enumerate(doc) if token.pos_ == 'VERB']
print(verb_idxs)

[(1, established), (9, went)]


In [9]:
def get_phrase(doc, head_idx, tag):
    for token in doc:
        if tag in token.dep_ and token.head.i == head_idx:
            subtree = list(token.subtree)
            start = subtree[0].i
            end = subtree[-1].i + 1
            return doc[start:end]

In [10]:
doc = nlp(sentence)

for verb_idx in verb_idxs:
    subject_phrase = get_phrase(doc, verb_idx[0], 'subj')
    object_phrase = get_phrase(doc, verb_idx[0], 'obj')
    print('subject:', subject_phrase)
    print('predicate:', doc[verb_idx[0]])
    print('object:', object_phrase)

subject: I
predicate: established
object: my own workshop
subject: I
predicate: went
object: None
