In [1]:
import spacy

In [2]:
nlp = spacy.load('en_core_web_sm')

In [3]:
# Create a doc object
doc = nlp(u'Tesla is looking to buy U.S. startup for $6 million')

In [4]:
for token in doc:
    print(token.text)

Tesla
is
looking
to
buy
U.S.
startup
for
$
6
million


In [5]:
for token in doc:
    print(token.text, token.pos)

Tesla 95
is 99
looking 99
to 93
buy 99
U.S. 95
startup 91
for 84
$ 98
6 92
million 92


In [6]:
# To find out the Part Of Speech (POS) use token.pos_
for token in doc:
    print(token.text, token.pos, token.pos_, token.dep_)

Tesla 95 PROPN nsubj
is 99 VERB aux
looking 99 VERB ROOT
to 93 PART aux
buy 99 VERB xcomp
U.S. 95 PROPN compound
startup 91 NOUN dobj
for 84 ADP prep
$ 98 SYM quantmod
6 92 NUM compound
million 92 NUM pobj


In [7]:
nlp.pipeline

[('tagger', <spacy.pipeline.Tagger at 0x119b86b50>),
 ('parser', <spacy.pipeline.DependencyParser at 0x11bd87a70>),
 ('ner', <spacy.pipeline.EntityRecognizer at 0x11bd9e050>)]

In [8]:
nlp.pipe_names

['tagger', 'parser', 'ner']

In [9]:
doc2 = nlp(u"Tesla isn't looking into startups anymore.")

In [10]:
for token in doc2:
    print(token.text, token.pos, token.pos_, token.dep_)

Tesla 95 PROPN nsubj
is 99 VERB aux
n't 85 ADV neg
looking 99 VERB ROOT
into 84 ADP prep
startups 91 NOUN pobj
anymore 85 ADV advmod
. 96 PUNCT punct


In [11]:
doc2 = nlp(u"Tesla isn't    looking into startups anymore.")

In [12]:
# Handling of white space is Spacy.
for token in doc2:
    print(token.text, token.pos, token.pos_, token.dep_)

Tesla 95 PROPN nsubj
is 99 VERB aux
n't 85 ADV neg
    102 SPACE 
looking 99 VERB ROOT
into 84 ADP prep
startups 91 NOUN pobj
anymore 85 ADV advmod
. 96 PUNCT punct


In [13]:
# Use indexing to grab the tokens we want
doc2[0]

Tesla

In [14]:
doc2[0].pos_

'PROPN'

In [15]:
doc2 = nlp(u"Tesla isn't looking into startups anymore.")

In [16]:
print(doc2[0].text)

Tesla


In [19]:
print(doc2[0].lemma_)

tesla


In [18]:
print(doc2[0].pos_)

PROPN


In [20]:
print(doc2[0].tag_)

NNP


In [22]:
print(doc2[0].shape_)

Xxxxx


In [23]:
print(doc2[0].is_alpha)

True


In [24]:
print(doc2[0].is_stop)

False


In [25]:
# Span of a document

doc3 = nlp(u'Although commmonly attributed to John Lennon from his song "Beautiful Boy", \
the phrase "Life is what happens to us while we are making other plans" was written by \
cartoonist Allen Saunders and published in Reader\'s Digest in 1957, when Lennon was 17.')

In [26]:
life_quote = doc3[16:30]

In [27]:
print(life_quote)

"Life is what happens to us while we are making other plans"


In [28]:
#Sentences
doc4 = nlp(u'This is the first sentence. This is another sentence. This is the last sentence.')

In [29]:
for sentence in doc4.sents:
    print(sentence)

This is the first sentence.
This is another sentence.
This is the last sentence.


In [30]:
doc4[6].text

'This'

In [31]:
doc4[6].is_sent_start

True

In [32]:
doc4[7].text

'is'

In [33]:
doc4[8].text

'another'

In [34]:
doc4[8].is_sent_start

In [None]:
# We see nothing. Last command returned None.