In [33]:
import spacy

# https://spacy.io/usage/linguistic-features/

In [10]:
#Tokenization

In [2]:
nlp = spacy.load('en_core_web_sm')

In [8]:
doc = nlp("Apple isn't looking at buying U.K. startup for $1 billion")

In [9]:
for token in doc:
  print(token.text)

Apple
is
n't
looking
at
buying
U.K.
startup
for
$
1
billion


In [11]:
# Part - of - Speech[POS] Tagging

In [12]:
doc

Apple isn't looking at buying U.K. startup for $1 billion

In [14]:
for token in doc:
  print(token.text,token.lemma_)

Apple Apple
is be
n't not
looking look
at at
buying buy
U.K. U.K.
startup startup
for for
$ $
1 1
billion billion


In [20]:
for token in doc:
  print(f'{token.text:{15}}  {token.lemma_:{15}} {token.pos_:{10}} {token.is_stop}')

Apple            Apple           PROPN      False
is               be              AUX        True
n't              not             PART       True
looking          look            VERB       False
at               at              ADP        True
buying           buy             VERB       False
U.K.             U.K.            PROPN      False
startup          startup         NOUN       False
for              for             ADP        True
$                $               SYM        False
1                1               NUM        False
billion          billion         NUM        False


In [21]:
# Dependency Parsing

In [24]:
for chunk in doc.noun_chunks:
  print(f'{chunk.text:{15}} {chunk.root.text:{15}} {chunk.root.dep_}')

Apple           Apple           nsubj
U.K. startup    startup         dobj


In [25]:
# Named entity Recognition

In [26]:
doc

Apple isn't looking at buying U.K. startup for $1 billion

In [29]:
for ent in doc.ents:
  print(ent.text, ent.label_)

Apple ORG


In [30]:
#### Sentence Segmentation

In [34]:
import spacy

nlp = spacy.load("en_core_web_sm")
doc = nlp("This is a sentence. This is another sentence.")
for sent in doc.sents:
    print(sent.text)

This is a sentence.
This is another sentence.


In [35]:
doc1 = nlp("Welcome to.*.KGP Talkie.*.Thanks for watching")

In [36]:
for sent in doc1.sents:
    print(sent.text)

Welcome to.*.KGP
Talkie.*.Thanks for watching


In [37]:
# Visualization

In [39]:
from spacy import displacy

In [40]:
doc = nlp("Welcome to KGP Talkie ...Thanks...Like and Subscribe")

In [41]:
doc

Welcome to KGP Talkie ...Thanks...Like and Subscribe

In [47]:
displacy.render(doc,style='dep',options=['compact':True])

SyntaxError: ignored