In [1]:
import spacy

In [2]:
nlp=spacy.load('en_core_web_sm')

In [3]:
doc=nlp(u'Tesla is looking at buying U.S. startup for $6 million')

In [4]:
type(doc)

spacy.tokens.doc.Doc

In [5]:
for token in doc:
    print(token.text)

Tesla
is
looking
at
buying
U.S.
startup
for
$
6
million


In [12]:
for token in doc:
    print(token.text,'\t',token.pos_,'\t',token.dep_,'\t',token.pos)

Tesla 	 PROPN 	 nsubj 	 95
is 	 VERB 	 aux 	 99
looking 	 VERB 	 ROOT 	 99
at 	 ADP 	 prep 	 84
buying 	 VERB 	 pcomp 	 99
U.S. 	 PROPN 	 compound 	 95
startup 	 NOUN 	 dobj 	 91
for 	 ADP 	 prep 	 84
$ 	 SYM 	 quantmod 	 98
6 	 NUM 	 compound 	 92
million 	 NUM 	 pobj 	 92


### Spacy can interpret each and every token to give a meaning way.

In [13]:
nlp.pipeline

[('tagger', <spacy.pipeline.Tagger at 0x232794ec9e8>),
 ('parser', <spacy.pipeline.DependencyParser at 0x2327a8a8678>),
 ('ner', <spacy.pipeline.EntityRecognizer at 0x2327a8a86d0>)]

####  When we run this command, our NLP entering the text and then perform the series of operation like tagging, parsing, and describing.

In [14]:
nlp.pipe_names

['tagger', 'parser', 'ner']

### The first step in processing any text is to split it up all the components parts. That is the word and punctuation into tokens and these tokens are annoted inside the doc object contain descriptive information.

In [16]:
doc2=nlp(u"Tesla and SpaceX isn't looking into startup anymore.")

In [18]:
for token in doc2:
    print (token.text,token.pos_)

Tesla PROPN
and CCONJ
SpaceX PROPN
is VERB
n't ADV
looking VERB
into ADP
startup NOUN
anymore ADV
. PUNCT


In [19]:
doc2[0]

Tesla

In [20]:
doc2[1:5]

and SpaceX isn't

In [21]:
doc2[1].dep_

'cc'

In [22]:
doc2[2].dep_

'conj'

In [23]:
doc2[3].dep_

'aux'

## SPANS

In [24]:
doc3=nlp(u"Today i was looking good in my first day of my office. I am not happy with my work because this work doesn't related to me. i hope that i will definetely become a data scientist one day. ")

In [27]:
life_goal=doc3[32:]

In [28]:
for token in life_goal:
    print(token.text)

i
will
definetely
become
a
data
scientist
one
day
.


In [29]:
print(life_goal)

i will definetely become a data scientist one day.


In [30]:
type(life_goal)#span object of document file

spacy.tokens.span.Span

In [31]:
type(doc3)# Documented object model

spacy.tokens.doc.Doc

In [34]:
doc4=nlp(u"This is the first sentence. This is second sentence. This is the last sentence.")

In [35]:
for sentence in doc4.sents:
    print(sentence)

This is the first sentence.
This is second sentence.
This is the last sentence.


In [36]:
doc4[6].is_sent_start #it will return True if the given index is sentence starting point.

True

In [37]:
doc4[8].is_sent_start #it will return null if the given index is not sentence starting point

In [38]:
mystring='"We\'re moving to L.A !"'
print(mystring)

"We're moving to L.A !"


In [39]:
Doc=nlp(mystring)

In [40]:
for token in doc:
    print(token.text,token.pos_)

Tesla PROPN
is VERB
looking VERB
at ADP
buying VERB
U.S. PROPN
startup NOUN
for ADP
$ SYM
6 NUM
million NUM


In [41]:
Doc2=nlp(u"We're here to help! send email to our custormer executive email, customerservice@solutions.com for further queries or visit at the http://www.solutonsprovide.com")

In [42]:
for token in Doc2:
    print(token)

We
're
here
to
help
!
send
email
to
our
custormer
executive
email
,
customerservice@solutions.com
for
further
queries
or
visit
at
the
http://www.solutonsprovide.com


In [43]:
len(Doc2)

23

In [45]:
len(Doc2.vocab) #means our spacy contains this much vocabulary in total.

57852

### TOKENS CANNOT BE REASSIGNED 

In [47]:
#To find the name entities in the sentence.
doc5=nlp(u'Apple to build a Hong Kong factory for $6 million')

for t in doc5:
    print(t.text,end='  |  ')

Apple  |  to  |  build  |  a  |  Hong  |  Kong  |  factory  |  for  |  $  |  6  |  million  |  

In [48]:
#to see entity

for t1 in doc5.ents:
    print(t1)

Apple
Hong Kong
$6 million


In [49]:
doc_last=nlp(u'Today i was in metro. i was heading to the land of desire and i saw the girl standing there. she said,"hey man why you want to take that train". the indian summer skies unforgettable fire in her eyes. the goal of my intention of every intention of walking away but then i realize she gonna love me today')

In [50]:
len(doc_last)

65

In [51]:
for i in doc_last.ents:
    print(i)

Today
the indian summer
today


In [52]:
for i in doc5.ents:
    print(i)
    print('\n')
    print(i.label_)
    print('\n')
    print('\n')
    print(str(spacy.explain(i.label_)))

Apple


ORG




Companies, agencies, institutions, etc.
Hong Kong


GPE




Countries, cities, states
$6 million


MONEY




Monetary values, including unit


In [53]:
doc6=nlp(u'Autonoumous cars shifts insurance liability towards manufactures.')

In [54]:
for chink in doc6.noun_chunks:
    print(chink)

Autonoumous cars shifts insurance liability
manufactures
