In [2]:
import spacy
# python -m spacy download en_core_web_sm  ## after pip install

In [5]:
nlp = spacy.load("en_core_web_sm")

In [7]:
doc = nlp('Tesla is looking at buying U.S. startup for $6 million')

In [8]:
for token in doc:
    print(token.text)

Tesla
is
looking
at
buying
U.S.
startup
for
$
6
million


In [9]:
for token in doc:
    print(token.text, token.pos, token.pos_)

Tesla 96 PROPN
is 87 AUX
looking 100 VERB
at 85 ADP
buying 100 VERB
U.S. 96 PROPN
startup 92 NOUN
for 85 ADP
$ 99 SYM
6 93 NUM
million 93 NUM


In [12]:
for token in doc:
    print(f"|{token.text}\t|{token.pos}\t|{token.pos_}\t|{token.dep}\t|{token.dep_}\t|")

|Tesla	|96	|PROPN	|429	|nsubj	|
|is	|87	|AUX	|405	|aux	|
|looking	|100	|VERB	|8206900633647566924	|ROOT	|
|at	|85	|ADP	|443	|prep	|
|buying	|100	|VERB	|438	|pcomp	|
|U.S.	|96	|PROPN	|7037928807040764755	|compound	|
|startup	|92	|NOUN	|416	|dobj	|
|for	|85	|ADP	|443	|prep	|
|$	|99	|SYM	|446	|quantmod	|
|6	|93	|NUM	|7037928807040764755	|compound	|
|million	|93	|NUM	|439	|pobj	|


In [13]:
nlp.pipeline

[('tok2vec', <spacy.pipeline.tok2vec.Tok2Vec at 0x1202c7770>),
 ('tagger', <spacy.pipeline.tagger.Tagger at 0x1202c75f0>),
 ('parser', <spacy.pipeline.dep_parser.DependencyParser at 0x117e6b140>),
 ('attribute_ruler',
  <spacy.pipeline.attributeruler.AttributeRuler at 0x1202f88d0>),
 ('lemmatizer', <spacy.lang.en.lemmatizer.EnglishLemmatizer at 0x120464510>),
 ('ner', <spacy.pipeline.ner.EntityRecognizer at 0x117e6b450>)]

In [14]:
nlp.pipe_names

['tok2vec', 'tagger', 'parser', 'attribute_ruler', 'lemmatizer', 'ner']

In [15]:
doc2 = nlp(u"Tesla isn't   looking into startups anymore.")

for token in doc2:
    print(token.text, token.pos_, token.dep_)

Tesla PROPN nsubj
is AUX aux
n't PART neg
   SPACE dep
looking VERB ROOT
into ADP prep
startups NOUN pobj
anymore ADV advmod
. PUNCT punct


In [16]:
doc2[0].pos_

'PROPN'

In [17]:
doc2[0].dep_

'nsubj'

In [18]:
spacy.explain('PROPN')

'proper noun'

In [19]:
spacy.explain(doc[0].dep_)

'nominal subject'

In [20]:
# Lemmas (the base form of the word):
print(doc2[4].text)
print(doc2[4].lemma_)

looking
look


In [21]:
# Simple Parts-of-Speech & Detailed Tags:
print(doc2[4].pos_)
print(doc2[4].tag_ + ' / ' + spacy.explain(doc2[4].tag_))

VERB
VBG / verb, gerund or present participle


In [22]:
# Word Shapes:
print(doc2[0].text+': '+doc2[0].shape_)
print(doc[5].text+' : '+doc[5].shape_)

Tesla: Xxxxx
U.S. : X.X.


In [23]:
# Boolean Values:
print(doc2[0].is_alpha)
print(doc2[0].is_stop)

True
False


In [24]:
doc3 = nlp(u'Although commmonly attributed to John Lennon from his song "Beautiful Boy", \
the phrase "Life is what happens to us while we are making other plans" was written by \
cartoonist Allen Saunders and published in Reader\'s Digest in 1957, when Lennon was 17.')

In [25]:
life_quote = doc3[16:30]
print(life_quote)

"Life is what happens to us while we are making other plans"


In [26]:
type(life_quote)

spacy.tokens.span.Span

In [27]:
doc4 = nlp(u'This is the first sentence. This is another sentence. This is the last sentence.')

In [28]:
for sent in doc4.sents:
    print(sent)

This is the first sentence.
This is another sentence.
This is the last sentence.


In [None]:
doc4[6].is_sent_Star