## NLP - Spacy pipeline

In [1]:
import spacy 

In [2]:
# Blank pipeline in spacy
nlp1 = spacy.blank('en')

In [3]:
doc = nlp1("Don't think just do it. This was said by goose.")
for token in doc:
    print(token)

Do
n't
think
just
do
it
.
This
was
said
by
goose
.


In [4]:
#Here we can see that there is no pipeline in the spacy.blank
nlp1.pipe_names

[]

In [5]:
#By running this code the error will pop up that in the pipeline there is no required tool.
for sentence in doc.sents:
    print(sentence)

ValueError: [E030] Sentence boundaries unset. You can add the 'sentencizer' component to the pipeline with: `nlp.add_pipe('sentencizer')`. Alternatively, add the dependency parser or sentence recognizer, or set sentence boundaries by setting `doc[i].is_sent_start`.

### Adding a component to a blank pipeline

In [6]:
nlp1.add_pipe('sentencizer')

<spacy.pipeline.sentencizer.Sentencizer at 0x1d61e036710>

In [7]:
#This will work because we add the necessary pipeline which is sentencizer
doc = nlp1("Don't think just do it. This was said by goose.")
for sentence in doc.sents:
    print(sentence)

Don't think just do it.
This was said by goose.


### Spacy load pipeline(fully loaded pipeline)

In [8]:
nlp = spacy.load("en_core_web_sm")

In [9]:
doc = nlp("Don't think just do it. This was said by goose.")

In [12]:
nlp.pipeline

[('tok2vec', <spacy.pipeline.tok2vec.Tok2Vec at 0x1d61e31b830>),
 ('tagger', <spacy.pipeline.tagger.Tagger at 0x1d61e3560f0>),
 ('parser', <spacy.pipeline.dep_parser.DependencyParser at 0x1d6200017e0>),
 ('attribute_ruler',
  <spacy.pipeline.attributeruler.AttributeRuler at 0x1d61d9a7c10>),
 ('lemmatizer', <spacy.lang.en.lemmatizer.EnglishLemmatizer at 0x1d61e4a7350>),
 ('ner', <spacy.pipeline.ner.EntityRecognizer at 0x1d620001380>)]

In [13]:
for sentence in doc.sents:
    print(sentence)

Don't think just do it.
This was said by goose.


In [16]:
for token in doc:
    print(token,'|',spacy.explain(token.pos_),'|',token.lemma_)

Do | auxiliary | do
n't | particle | not
think | verb | think
just | adverb | just
do | verb | do
it | pronoun | it
. | punctuation | .
This | pronoun | this
was | auxiliary | be
said | verb | say
by | adposition | by
goose | noun | goose
. | punctuation | .


### Named entity recognition

In [20]:
#Detecting the name and labeled entity from the text eg : Tata an Organisation
doc = nlp("Tesla Inc is going to acquire twitter for $45 billion")
for token in doc.ents:
    print(token.text,token.label_)

Tesla Inc ORG
$45 billion MONEY
