In [2]:
import spacy

In [3]:
nlp=spacy.load('en_core_web_sm')

In [4]:
doc= nlp(u'Tesla is looking at buying U.S. startup for $6 million')

In [5]:
for token in doc:
    print(token.text,token.pos_,token.dep_)#POS means parts of speach

Tesla PROPN nsubj
is VERB aux
looking VERB ROOT
at ADP prep
buying VERB pcomp
U.S. PROPN compound
startup NOUN dobj
for ADP prep
$ SYM quantmod
6 NUM compound
million NUM pobj


In [6]:
nlp.pipeline

#NER-->Named Entity Recognizer

[('tagger', <spacy.pipeline.Tagger at 0x22552f152e8>),
 ('parser', <spacy.pipeline.DependencyParser at 0x22552f355c8>),
 ('ner', <spacy.pipeline.EntityRecognizer at 0x22552f35570>)]

In [7]:
nlp.pipe_names

['tagger', 'parser', 'ner']

In [8]:
doc2=nlp(u"Tesla isn't looking     into    startups anymore.")

In [9]:
for token in doc2:
    print(token,token.pos_,token.dep_)

Tesla PROPN nsubj
is VERB aux
n't ADV neg
looking VERB ROOT
     SPACE 
into ADP prep
    SPACE 
startups NOUN pobj
anymore ADV advmod
. PUNCT punct


In [10]:
doc2[0].pos_

'PROPN'

In [11]:
doc2[0].dep_

'nsubj'

In [12]:
doc3 = nlp(u'Although commmonly attributed to John Lennon from his song "Beautiful Boy", \
the phrase "Life is what happens to us while we are making other plans" was written by \
cartoonist Allen Saunders and published in Reader\'s Digest in 1957, when Lennon was 17.')

In [13]:
life_quote=doc3[16:30]
print(life_quote)

"Life is what happens to us while we are making other plans"


In [14]:
#Spacy finds it is a span or not
type(life_quote)

spacy.tokens.span.Span

In [15]:
type(doc3)

spacy.tokens.doc.Doc

In [16]:
doc4=nlp(u"This is the first sentence. This is another sentence. This is the last sentence")

In [17]:
for sentence in doc4.sents:
    print(sentence)

This is the first sentence.
This is another sentence.
This is the last sentence


In [18]:
# To check that it is a start of the sentence
doc4[6].is_sent_start


True

In [19]:
mystring='"We\'re moving to L.A.!"'

In [20]:
print(mystring)

"We're moving to L.A.!"


In [21]:
doc=nlp(mystring)

In [22]:
doc


"We're moving to L.A.!"

In [23]:
for token in doc:
    print(token)

"
We
're
moving
to
L.A.
!
"


In [24]:
doc2=nlp("we're heere to help! Send snail-mail, email support@oursite.com or visit us at http://www.oursite.com!")

In [25]:
for t in doc2:
    print(t)

we
're
heere
to
help
!
Send
snail
-
mail
,
email
support@oursite.com
or
visit
us
at
http://www.oursite.com
!


In [26]:
doc3=nlp(u"A 5 km NYC cab ride costs $10.30")

In [27]:
for t in doc3:
    print(t)

A
5
km
NYC
cab
ride
costs
$
10.30


In [28]:
doc4=nlp("Let's visit St.Louis in the U.S. next year.")

In [29]:
for t in doc4:
    print(t)

Let
's
visit
St
.
Louis
in
the
U.S.
next
year
.


In [31]:
len(doc4)

12

In [32]:
len(doc4.vocab)

57852

In [33]:
doc5=nlp("It is better to give than receive.")

In [35]:
doc5[2:5]

better to give

In [36]:
doc5[0]="test"

TypeError: 'spacy.tokens.doc.Doc' object does not support item assignment

In [40]:
doc8=nlp('Apple to build Hong Kong factory for $6 million')

In [42]:
for token in doc8:
    print(token.text,end=' | ')

Apple | to | build | Hong | Kong | factory | for | $ | 6 | million | 

In [45]:
for entity in doc8.ents:
    print(entity)
    print(entity.label_,"   ",str(spacy.explain(entity.label_)))

Apple
ORG     Companies, agencies, institutions, etc.
Hong Kong
GPE     Countries, cities, states
$6 million
MONEY     Monetary values, including unit


In [48]:
doc9=nlp('Autonomous cars shift insurance liability towards manufactures.')

In [49]:
for chunk in doc9.noun_chunks:
    print(chunk)

Autonomous cars
insurance liability
manufactures


In [50]:
from spacy import displacy

In [51]:
doc=nlp("Apple is going to build a U.K factory for $6 million.")

In [55]:
displacy.render(doc,style='dep',jupyter=True, options={'distance':70})

In [56]:
doc=nlp("Over the last quater Apple sold nearly 20 thousand iPods for a profit of $6 million")

In [58]:
displacy.render(doc,style='ent',jupyter=True)

In [60]:
import nltk
from nltk.stem.porter import PorterStemmer


In [61]:
p_stemmer=PorterStemmer()

In [67]:
words=['run','running','ran','runs','easily','fairly','fairness']

In [68]:
for word in words:
    print(word,"--->",p_stemmer.stem(word))

run ---> run
running ---> run
ran ---> ran
runs ---> run
easily ---> easili
fairly ---> fairli
fairness ---> fair


In [69]:
from nltk.stem.snowball import SnowballStemmer

In [70]:
s_stemmer=SnowballStemmer(language='english')

In [71]:
for word in words:
    print(word + "-->"+s_stemmer.stem(word))

run-->run
running-->run
ran-->ran
runs-->run
easily-->easili
fairly-->fair
fairness-->fair


In [77]:
words=['generous','generation','generously','generate']

In [78]:
for word in words:
    print(word+ "-->" +s_stemmer.stem(word))

generous-->generous
generation-->generat
generously-->generous
generate-->generat


In [79]:
#Lemmatization
import spacy

nlp=spacy.load('en_core_web_sm')

In [80]:
doc1=nlp('I am a runner running in a rice becuase I love to run since I ran today')

In [88]:
for token in doc1:
    print(token,'\t',token.pos_ , '\t',token.lemma_)

I 	 PRON 	 -PRON-
am 	 VERB 	 be
a 	 DET 	 a
runner 	 NOUN 	 runner
running 	 VERB 	 run
in 	 ADP 	 in
a 	 DET 	 a
rice 	 NOUN 	 rice
becuase 	 NOUN 	 becuase
I 	 PRON 	 -PRON-
love 	 VERB 	 love
to 	 PART 	 to
run 	 VERB 	 run
since 	 ADP 	 since
I 	 PRON 	 -PRON-
ran 	 VERB 	 run
today 	 NOUN 	 today


In [90]:
token.lemma# it is a big hashing table for reference

11042482332948150395

In [91]:
def show_lemmas(text):
    for token in text:
        print(token.text,token.pos_,token.lemma,token.lemma_)

In [95]:
doc2=nlp('I saw ten mice today!')

In [96]:
show_lemmas(doc2)

I PRON 561228191312463089 -PRON-
saw VERB 11925638236994514241 see
ten NUM 7970704286052693043 ten
mice NOUN 1384165645700560590 mouse
today NOUN 11042482332948150395 today
! PUNCT 17494803046312582752 !
