# spaCy basics

-  token.text
- .pos , .pos_
- .dep , .dep_
- .shape, .shape_
- .is_alpha
- .is_stop
-  nlp.pipeline

In [1]:
import spacy

In [2]:
import en_core_web_sm
nlp = en_core_web_sm.load()  #stand for english core language with web interface

In [3]:
doc = "Tata is going to sell India based cars to U.S.A. for $3 million"

In [4]:
doc = nlp(doc)

### token.text ===>> for tokenize/split the words

In [5]:
for token in doc:
    print(token.text)

Tata
is
going
to
sell
India
based
cars
to
U.S.A.
for
$
3
million


### .pos_ ===>> Return which part of speech is this

In [7]:
for token in doc:
    print(token.text,token.pos_)

Tata PROPN
is AUX
going VERB
to PART
sell VERB
India PROPN
based VERB
cars NOUN
to ADP
U.S.A. PROPN
for ADP
$ SYM
3 NUM
million NUM


In [None]:
for token in doc:
    print(token.text,token.pos_)

### .dep_ ===>>> told us syntactic dependency of each words


In [9]:
for token in doc:
    print(token.text,token.pos_,token.dep_)

   

Tata PROPN nsubj
is AUX aux
going VERB ROOT
to PART aux
sell VERB xcomp
India PROPN dative
based VERB amod
cars NOUN dobj
to ADP prep
U.S.A. PROPN pobj
for ADP prep
$ SYM quantmod
3 NUM compound
million NUM pobj


### nlp.pipeline ==>>> we are using pipelines to identify some particuler things with in a text like phrases, tagger, name etc

In [11]:
nlp.pipeline

[('tagger', <spacy.pipeline.pipes.Tagger at 0x15ea1c0fec8>),
 ('parser', <spacy.pipeline.pipes.DependencyParser at 0x15ea19cc528>),
 ('ner', <spacy.pipeline.pipes.EntityRecognizer at 0x15ea29360a8>)]

In [12]:
nlp.pipe_names

['tagger', 'parser', 'ner']

### .shape_ ===>>> return us the shape of our word in capital and small letters

In [14]:
for token in doc:
    print(token.text,token.shape_)

Tata Xxxx
is xx
going xxxx
to xx
sell xxxx
India Xxxxx
based xxxx
cars xxxx
to xx
U.S.A. X.X.X.
for xxx
$ $
3 d
million xxxx


### .is_alpha ===>> return us boolean (true or false) if word is alphabet

In [15]:
for token in doc:
    print(token.text,token.is_alpha)

Tata True
is True
going True
to True
sell True
India True
based True
cars True
to True
U.S.A. False
for True
$ False
3 False
million True


### .is_stop ===>>> returns boolean(true or false)if word is a stop word

-  what is stop words in nlp ?


In [16]:
for token in doc:
    print(token.text,token.is_stop)

Tata False
is True
going False
to True
sell False
India False
based False
cars False
to True
U.S.A. False
for True
$ False
3 False
million False


#### - if we want to search by indexing

In [18]:
text = nlp("I am Ajay Goswami")

In [19]:
text[0]

I

In [20]:
text[0].pos_

'PRON'

In [21]:
text[2]

Ajay

In [22]:
text[2].pos_

'PROPN'