In [1]:
import spacy
nlp = spacy.load('en')

In [2]:
doc = nlp(u'Hello, world. Welcome to natural language processing.')

In [3]:
token = doc[0]
print(token)

for sent in doc.sents:
    print(sent)

Hello
Hello, world.
Welcome to natural language processing.


In [4]:
for token in doc:
    print('{} - {}'.format(token, token.pos_))

Hello - INTJ
, - PUNCT
world - NOUN
. - PUNCT
Welcome - VERB
to - ADP
natural - ADJ
language - NOUN
processing - NOUN
. - PUNCT


In [8]:
def tokens_to_root(token):
    token_to_r = list()
    while token.head is not token:
        token_to_r.append(token)
        token = token.head     
        token_to_r.append(token)
    return token_to_r

for token in doc:
    print('{} --> {}'.format(token, tokens_to_root(token)))

Hello --> []
, --> [,, Hello]
world --> [world, Hello]
. --> [., Hello]
Welcome --> []
to --> [to, Welcome]
natural --> [natural, processing, processing, to, to, Welcome]
language --> [language, processing, processing, to, to, Welcome]
processing --> [processing, to, to, Welcome]
. --> [., Welcome]


In [9]:
for token in doc:
    print('-> '.join(['{}-{}'.format(dependent_token, dependent_token.dep_) 
                      for dependent_token in tokens_to_root(token)]))


,-punct-> Hello-ROOT
world-npadvmod-> Hello-ROOT
.-punct-> Hello-ROOT

to-prep-> Welcome-ROOT
natural-amod-> processing-pobj-> processing-pobj-> to-prep-> to-prep-> Welcome-ROOT
language-compound-> processing-pobj-> processing-pobj-> to-prep-> to-prep-> Welcome-ROOT
processing-pobj-> to-prep-> to-prep-> Welcome-ROOT
.-punct-> Welcome-ROOT


In [14]:
doc_2 = nlp(u'I went to delhi where I met my old friend Jack from uni.')
for ent in doc_2.ents:
    print('{} - {}'.format(ent, ent.label_))

Jack - PERSON


In [16]:
print([chunk for chunk in doc_2.noun_chunks])

[I, I, my old friend, uni]


In [18]:
for token in doc_2:
    print(token, ',', token.prob)

I , -4.064180850982666
went , -8.474893569946289
to , -3.83851957321167
delhi , -19.579313278198242
where , -7.183883190155029
I , -4.064180850982666
met , -9.784490585327148
my , -5.918124675750732
old , -7.7954816818237305
friend , -8.825821876525879
Jack , -11.20296573638916
from , -6.028810501098633
uni , -19.579313278198242
. , -3.0729479789733887


In [20]:
doc = nlp(u"Apples and oranges are similar. Boots and hippos aren't.")
apples = doc[0]
oranges = doc[2]
boots = doc[6]
hippos = doc[8]
print(apples.similarity(oranges))
print(boots.similarity(hippos))

0.0
0.0


In [21]:
apples_sent, boots_sent = doc.sents
fruit = doc.vocab[u'fruit']
print(apples_sent.similarity(fruit))
print(boots_sent.similarity(fruit))

0.569403101179
0.323890751106
