In [1]:
# importing spacy library and creating a modelnl
import spacy
nlp = spacy.load('en_core_web_sm')

In [6]:
# a text object is created with nlp model
# u stands for uniform string
doc = nlp(u"Tesla is looking at buying U.S. startup for $6 million. It couldn't be real.")

In [7]:
# it is possible to loop over tokens 
# pos: part of speech, pos_: type of the words, dep_: syntactic dependency
for token in doc:
    print(token.text, token.pos, token.pos_, token.dep_)

Tesla 96 PROPN nsubj
is 87 AUX aux
looking 100 VERB ROOT
at 85 ADP prep
buying 100 VERB pcomp
U.S. 96 PROPN compound
startup 92 NOUN dobj
for 85 ADP prep
$ 99 SYM quantmod
6 93 NUM compound
million 93 NUM pobj
. 97 PUNCT punct
It 95 PRON nsubj
could 100 VERB aux
n't 94 PART neg
be 87 AUX ROOT
real 84 ADJ acomp
. 97 PUNCT punct


In [8]:
# listing the pipeline that nlp model applies to a piece of text
nlp.pipeline

[('tagger', <spacy.pipeline.pipes.Tagger at 0x1a310136d30>),
 ('parser', <spacy.pipeline.pipes.DependencyParser at 0x1a310025b80>),
 ('ner', <spacy.pipeline.pipes.EntityRecognizer at 0x1a310025b20>)]

In [11]:
# words could be analyzed individually
doc[2], doc[2].pos_

(looking, 'VERB')

In [12]:
doc2 = nlp(u'Although commmonly attributed to John Lennon from his song "Beautiful Boy", \
the phrase "Life is what happens to us while we are making other plans" was written by \
cartoonist Allen Saunders and published in Reader\'s Digest in 1957, when Lennon was 17.')

In [14]:
# takşng a span from a doc
life_quote = doc2[16:30]
print(life_quote)

"Life is what happens to us while we are making other plans"


In [15]:
# spacy understands when a piece of doc is taken as span
type(doc2), type(life_quote)

(spacy.tokens.doc.Doc, spacy.tokens.span.Span)

In [17]:
# sents method seperates the sentences
doc3 = nlp(u"This is the first sentence. This is the second! This is the last.")
for sentence in doc3.sents:
    print(sentence)

This is the first sentence.
This is the second!
This is the last.


In [20]:
# is_sent_start is used for to chechk start of a sentence
doc3[6].is_sent_start

True

In [25]:
# spacy can tokenize the complex sentences
mystring = '"We\'re moving to L.A.!". Our website is www.website.com. Visit us very-fast!'
doc4 = nlp(mystring)
for token in doc4:
    print(token)

"
We
're
moving
to
L.A.
!
"
.
Our
website
is
www.website.com
.
Visit
us
very
-
fast
!


In [26]:
# number of tokens in a sentence
len(doc4)

20

In [27]:
# printing all the tokens in one line is possible
doc5 = nlp(u'Apple to build a Hong Kong factory for $6 miillion.')
for token in doc5:
    print(token, end=' | ')

Apple | to | build | a | Hong | Kong | factory | for | $ | 6 | miillion | . | 

In [32]:
# listing the entities and their types and explanations
for entity in doc5.ents:
    print(entity)
    print(entity.label_)
    print(str(spacy.explain(entity.label_)))
    print('\n')

Apple
ORG
Companies, agencies, institutions, etc.


Hong Kong
GPE
Countries, cities, states


$6
MONEY
Monetary values, including unit




In [33]:
# noun chunks in a text
doc6 = nlp(u'Autonomous cars shift insurance liability toward manufacturers.')
for chunk in doc6.noun_chunks:
    print(chunk)

Autonomous cars
insurance liability
manufacturers


### visualization

In [34]:
from spacy import displacy

In [38]:
# visualizing a sentence with syntactic dependency
doc7 = nlp(u'Apple is going to build a U.K. factory for $6 million.')
displacy.render(doc7, style='dep', jupyter=True, options={'distance':80})

In [40]:
# visualization of entities in a sentence by highlighting method
doc8 = nlp(u'Over the last quarter, Apple sold nearly 20 thousands iPods for a profit of $6 million.')
displacy.render(doc8, style='ent', jupyter=True)

In [None]:
# showing visualizations on web
displacy.serve(doc8, style='dep')
# go to 'http://127.0.0.1:5000/' address in the web browser


Using the 'dep' visualizer
Serving on http://0.0.0.0:5000 ...

