In [1]:
#importing library
import spacy

In [2]:
#loading model in object name nlp
nlp = spacy.load('en_core_web_sm')

In [3]:
#creating a doc in which nlp model is applied to our given text
doc = nlp(u'Google is looking at buying U.S. startup for $6 million')

In [4]:
for token in doc:
    print(token.text,token.pos_,token.dep_)

Google PROPN nsubj
is VERB aux
looking VERB ROOT
at ADP prep
buying VERB pcomp
U.S. PROPN compound
startup NOUN dobj
for ADP prep
$ SYM quantmod
6 NUM compound
million NUM pobj


In [5]:
nlp.pipeline

[('tagger', <spacy.pipeline.pipes.Tagger at 0x271698f47f0>),
 ('parser', <spacy.pipeline.pipes.DependencyParser at 0x27169a52048>),
 ('ner', <spacy.pipeline.pipes.EntityRecognizer at 0x27169a520a8>)]

In [6]:
nlp.pipe_names

['tagger', 'parser', 'ner']

In [7]:
doc2 = nlp(u"Tesla isn't     looking for statup anymore.")

In [8]:
for token in doc2:
    print(token.text,token.pos_,token.dep_)

Tesla PROPN nsubj
is VERB ROOT
n't ADV neg
     SPACE 
looking VERB acomp
for ADP prep
statup NOUN pobj
anymore ADV advmod
. PUNCT punct


In [9]:
doc[0].pos_

'PROPN'

In [10]:
doc2[0].dep_

'nsubj'

In [11]:
doc2[0].text

'Tesla'

In [12]:
print(doc2[4].text)
print(doc2[4].lemma_)

looking
look


In [13]:
doc2[0].tag_

'NNP'

In [14]:
doc2[0].shape_

'Xxxxx'

In [15]:
doc2[0].is_alpha

True

In [16]:
doc2[0].is_stop

False

In [17]:
doc3 = nlp(u'Although commmonly attributed to John Lennon from his song "Beautiful Boy", \
the phrase "Life is what happens to us while we are making other plans" was written by \
cartoonist Allen Saunders and published in Reader\'s Digest in 1957, when Lennon was 17.')

In [18]:
life_quote = doc3[16:30]
print(life_quote)

"Life is what happens to us while we are making other plans"


In [19]:
type(life_quote)

spacy.tokens.span.Span

In [20]:
doc4 = nlp(u'This is the first sentence. This is another sentence. This is the last sentence.')

In [21]:
# doc4.sents breaks it in sentence
for sent in doc4.sents:
    print(sent)

This is the first sentence.
This is another sentence.
This is the last sentence.


### Tokeniztion
The process of braking up the original text into component pieces calles tokens.

In [22]:
mystring = '"We\'re moving to L.A.!"'
print(mystring)

"We're moving to L.A.!"


In [23]:
doc = nlp(mystring)

In [28]:
for token in doc:
    print(token.text)

"
We
're
moving
to
L.A.
!
"


In [29]:
doc2 = nlp(u"We're here to help! Send snail-mail, email support@oursite.com or visit us at http://www.oursite.com!")

for t in doc2:
    print(t)

We
're
here
to
help
!
Send
snail
-
mail
,
email
support@oursite.com
or
visit
us
at
http://www.oursite.com
!


In [30]:
doc3 = nlp(u'A 5km NYC cab ride costs $10.30')

for t in doc3:
    print(t)

A
5
km
NYC
cab
ride
costs
$
10.30


In [31]:
doc4 = nlp(u"Let's visit St. Louis in the U.S. next year.")

for t in doc4:
    print(t)

Let
's
visit
St.
Louis
in
the
U.S.
next
year
.


In [32]:
len(doc4)

11

In [33]:
doc5 = nlp(u'It is better to give than to receive.')

# Retrieve the third token:
doc5[2:5]

better to give

In [34]:
doc8 = nlp(u'Apple to build a Hong Kong factory for $6 million')
for token in doc8:
    print(token.text, end=' | ')

Apple | to | build | a | Hong | Kong | factory | for | $ | 6 | million | 

In [35]:
for entity in doc8.ents:
    print(entity)
    print(entity.label_)
    print(str(spacy.explain(entity.label_)))
    print('\n')      
          

Apple
ORG
Companies, agencies, institutions, etc.


Hong Kong
GPE
Countries, cities, states


$6 million
MONEY
Monetary values, including unit




In [37]:
for ent in doc8.ents:
    print(ent.text+' - '+ent.label_+' - '+str(spacy.explain(ent.label_)))

Apple - ORG - Companies, agencies, institutions, etc.
Hong Kong - GPE - Countries, cities, states
$6 million - MONEY - Monetary values, including unit


### Noun Chunks

In [38]:
doc9 = nlp(u"Autonomous cars shift insurance liability toward manufacturers.")
for chunk in doc9.noun_chunks:
    print(chunk.text)

Autonomous cars
insurance liability
manufacturers


In [39]:
from spacy import displacy


In [40]:
doc = nlp(u'Apple is going to build a U.K. factory for $6 million.')
displacy.render(doc, style='dep', jupyter=True, options={'distance': 110})

In [41]:
doc = nlp(u'Over the last quarter Apple sold nearly 20 thousand iPods for a profit of $6 million.')
displacy.render(doc, style='ent', jupyter=True)

### Stemming

In [42]:
# Import the toolkit and the full Porter Stemmer library
import nltk

from nltk.stem.porter import *

In [43]:
p_stemmer = PorterStemmer()

In [44]:
words = ['run','runner','running','ran','runs','easily','fairly']

In [45]:
for word in words:
    print(word+' --> '+p_stemmer.stem(word))

run --> run
runner --> runner
running --> run
ran --> ran
runs --> run
easily --> easili
fairly --> fairli


In [46]:
from nltk.stem.snowball import SnowballStemmer
s_stemmer = SnowballStemmer(language='english')

In [48]:
words = ['run','runner','running','ran','runs','easily','fairly','fairness']

In [49]:
for word in words:
    print(word+' --> '+s_stemmer.stem(word))

run --> run
runner --> runner
running --> run
ran --> ran
runs --> run
easily --> easili
fairly --> fair
fairness --> fair


In [50]:
word = ['generous','generation','generate']

In [51]:
for word in word:
    print(word+' --> '+s_stemmer.stem(word))

generous --> generous
generation --> generat
generate --> generat
