In [2]:
import spacy

# loads a model called nlp
nlp = spacy.load('en_core_web_sm')

In [2]:
# create a document
# unicode string

doc = nlp(u'Tesla is looking at buying U.S. startup for $6 million')

In [6]:
for token in doc:
    # print the token, part of speech use pos_ , dep is syntactic dependency
    print(token.text, token.pos, token.pos_, token.dep_)

Tesla 96 PROPN nsubj
is 100 VERB aux
looking 100 VERB ROOT
at 85 ADP prep
buying 100 VERB pcomp
U.S. 96 PROPN compound
startup 92 NOUN dobj
for 85 ADP prep
$ 99 SYM quantmod
6 93 NUM compound
million 93 NUM pobj


In [7]:
# show basic pipeline, ner is named entity recognition

nlp.pipeline

[('tagger', <spacy.pipeline.pipes.Tagger at 0x7f94a8593cc0>),
 ('parser', <spacy.pipeline.pipes.DependencyParser at 0x7f9468713408>),
 ('ner', <spacy.pipeline.pipes.EntityRecognizer at 0x7f9468713468>)]

In [12]:
doc2 = nlp(u"Tesla isn't       looking into startups anymore.")

In [13]:
for token in doc2:
    # test the power of spacy
    print(token.text, token.pos_, token.dep_)

Tesla PROPN nsubj
is VERB aux
n't ADV neg
       SPACE 
looking VERB ROOT
into ADP prep
startups NOUN pobj
anymore ADV advmod
. PUNCT punct


In [14]:
doc2[0]

Tesla

In [15]:
doc2[0].pos_

'PROPN'

In [16]:
doc2[0].tag_

'NNP'

In [17]:
doc3 = nlp(u'Although commmonly attributed to John Lennon from his song "Beautiful Boy", \
the phrase "Life is what happens to us while we are making other plans" was written by \
cartoonist Allen Saunders and published in Reader\'s Digest in 1957, when Lennon was 17.')

In [18]:
# make a span of doc 3
# slice it

life_quote = doc3[16:30]
print(life_quote)

"Life is what happens to us while we are making other plans"


In [19]:
type(life_quote)

spacy.tokens.span.Span

In [20]:
type(doc3)

spacy.tokens.doc.Doc

In [21]:
doc4 = nlp(u"This is the first sentence. This is another sentence. This is the last sentence")

In [23]:
for sentence in doc4.sents:
    print(sentence)

This is the first sentence.
This is another sentence.
This is the last sentence


In [24]:
doc4[6]

This

In [26]:
# understanding sentence structure

doc4[6].is_sent_start

True

In [27]:
# Create a string that includes opening and closing quotation marks
mystring = '"We\'re moving to L.A.!"'
print(mystring)

"We're moving to L.A.!"


In [28]:
# Create a Doc object and explore tokens
# use | as a token divider

doc = nlp(mystring)

for token in doc:
    print(token.text, end=' | ')

" | We | 're | moving | to | L.A. | ! | " | 

In [29]:
# spacy recognizes email and links by understanding punctuation use

doc2 = nlp(u"We're here to help! Send snail-mail, email support@oursite.com or visit us at http://www.oursite.com!")

for t in doc2:
    print(t)

We
're
here
to
help
!
Send
snail
-
mail
,
email
support@oursite.com
or
visit
us
at
http://www.oursite.com
!


<font color=green>Note that the exclamation points, comma, and the hyphen in 'snail-mail' are assigned their own tokens, yet both the email address and website are preserved.</font>

In [31]:
doc3 = nlp(u'A 5km NYC cab ride costs $10.30')

for t in doc3:
    print(t)

A
5
km
NYC
cab
ride
costs
$
10.30


In [33]:
# get the length
len(doc3)

9

In [37]:
len(doc.vocab)

552

In [38]:
# work with named entities

doc8 = nlp(u'Apple to build a Hong Kong factory for $6 million')

for token in doc8:
    print(token.text, end=' | ')

# print some spacing
print('\n----')

# get the label and explanations
for ent in doc8.ents:
    print(ent.text+' - '+ent.label_+' - '+str(spacy.explain(ent.label_)))

Apple | to | build | a | Hong | Kong | factory | for | $ | 6 | million | 
----
Apple - ORG - Companies, agencies, institutions, etc.
Hong Kong - GPE - Countries, cities, states
$6 million - MONEY - Monetary values, including unit


In [40]:
# look at noun chunks base noun phrases noun plus descriptors

doc9 = nlp(u"Autonomous cars shift insurance liability toward manufacturers.")

for chunk in doc9.noun_chunks:
    print(chunk.text)

Autonomous cars
insurance liability
manufacturers


In [41]:
doc10 = nlp(u"Red cars do not carry higher insurance rates.")

for chunk in doc10.noun_chunks:
    print(chunk.text)

Red cars
higher insurance rates


In [42]:
doc11 = nlp(u"He was a one-eyed, one-horned, flying, purple people-eater.")

for chunk in doc11.noun_chunks:
    print(chunk.text)

He
a one-eyed, one-horned, flying, purple people-eater


In [8]:
# visualizing

from spacy import displacy

doc = nlp(u'Apple is going to build a U.K. factory for $6 million.')
displacy.render(doc, style='dep', jupyter=True, options={'distance': 75})

In [9]:
# another visualizer

doc = nlp(u'Over the last quarter Apple sold nearly 20 thousand iPods for a profit of $6 million.')

displacy.render(doc, style='ent', jupyter=True)

In [None]:
# outside of jupyter

doc = nlp(u"This is a sentence.")

displacy.serve(doc, style='dep')

  "__main__", mod_spec)



Using the 'dep' visualizer
Serving on http://0.0.0.0:5000 ...

