### Tokenization

In [4]:
import spacy

In [5]:
nlp = spacy.load('en_core_web_sm')

In [10]:
doc = nlp("Apple isn't looking at buying U.K. startup for $1 billion")

In [11]:
for token in doc:
    print(token.text)

Apple
is
n't
looking
at
buying
U.K.
startup
for
$
1
billion


### Part-of_Speech [POS] Tagging

In [12]:
doc

Apple isn't looking at buying U.K. startup for $1 billion

In [15]:
for token in doc:
    print(token.text, token.lemma_)

Apple Apple
is be
n't n't
looking look
at at
buying buy
U.K. U.K.
startup startup
for for
$ $
1 1
billion billion


In [26]:
for token in doc:
    print(f'{token.text:{10}} {token.lemma_:{10}} {token.pos_:{10}} {token.is_stop}')

Apple      Apple      PROPN      False
is         be         AUX        True
n't        n't        PART       True
looking    look       VERB       False
at         at         ADP        True
buying     buy        VERB       False
U.K.       U.K.       PROPN      False
startup    startup    NOUN       False
for        for        ADP        True
$          $          SYM        False
1          1          NUM        False
billion    billion    NUM        False


### Dependency Parsing

In [28]:
for chunk in doc.noun_chunks:
    print(f'{chunk.text:{10}} {chunk.root.text:{10}} {chunk.root.dep_}')

Apple      Apple      nsubj
U.K.       U.K.       dobj


### Named Entity Recognition

In [29]:
doc

Apple isn't looking at buying U.K. startup for $1 billion

In [32]:
for ent in doc.ents:
    print(ent.text, ent.label_)

Apple ORG
U.K. GPE
$1 billion MONEY


### Sentence Segmentation

In [33]:
# doc.sents

In [34]:
doc

Apple isn't looking at buying U.K. startup for $1 billion

In [36]:
for sent in doc.sents:
    print(sent)

Apple isn't looking at buying U.K. startup for $1 billion


In [41]:
doc1 = nlp("Welcome to Kaithal. Thanks for Watching. Please Like and Subscribe")

In [42]:
doc1

Welcome to Kaithal. Thanks for Watching. Please Like and Subscribe

In [43]:
for sent in doc1.sents:
    print(sent)

Welcome to Kaithal.
Thanks for Watching.
Please Like and Subscribe


In [60]:
doc1 = nlp("Welcome to .*.Kaithal.*.Thanks for Watching")

In [61]:
for sent in doc1.sents:
    print(sent)

Welcome to .*.Kaithal.*.Thanks for Watching


In [90]:
from spacy.language import Language

@Language.component("set_rulee_mine")
def set_rulee_mine(doc):
    for token in doc[:-1]:
        if token.text == '.*.':
            doc[token.i + 1].is_sent_start = True
    return doc

In [91]:
nlp.add_pipe("set_rulee_mine")
doc1 = nlp("Welcome to.*.Kaithal.*.Thanks for Watching")

In [92]:
for sent in doc1.sents:
    print(sent)

Welcome to.*.Kaithal.*.Thanks for Watching


In [93]:
for token in doc1:
    print(token.text)

Welcome
to.*.Kaithal.*.Thanks
for
Watching


### Visualization

In [94]:
from spacy import displacy

In [95]:
doc

Apple isn't looking at buying U.K. startup for $1 billion

In [99]:
displacy.render(doc, style='dep')

In [104]:
displacy.render(doc, style='dep', options={'compact': True, 'distance': 100})

In [105]:
displacy.render(doc, style='ent')