In [6]:
#!pip install -U spacy

In [8]:
#!pip install -U spacy-lookups-data

In [9]:
#!python -m spacy download en_core_web_sm

In [10]:
import spacy

In [11]:
nlp = spacy.load('en_core_web_sm')

In [16]:
doc = nlp("Apple isn't looking at buying U.K. startup for $1 billion")

In [17]:
for token in doc:
    print(token.text)

Apple
is
n't
looking
at
buying
U.K.
startup
for
$
1
billion


### Part of speech[POS] tagging

In [18]:
doc

Apple isn't looking at buying U.K. startup for $1 billion

In [23]:
for token in doc:
    print(token.text, token.lemma_)

Apple Apple
is be
n't not
looking look
at at
buying buy
U.K. U.K.
startup startup
for for
$ $
1 1
billion billion


In [26]:
for token in doc:
    print(f'{token.text:{15}} {token.lemma_:{15}} {token.pos_:{15}}')

Apple           Apple           PROPN          
is              be              AUX            
n't             not             PART           
looking         look            VERB           
at              at              ADP            
buying          buy             VERB           
U.K.            U.K.            PROPN          
startup         startup         NOUN           
for             for             ADP            
$               $               SYM            
1               1               NUM            
billion         billion         NUM            


In [28]:
for token in doc:
    print(f'{token.text:{15}} {token.lemma_:{15}} {token.pos_:{15}} {token.is_stop}')

Apple           Apple           PROPN           False
is              be              AUX             True
n't             not             PART            True
looking         look            VERB            False
at              at              ADP             True
buying          buy             VERB            False
U.K.            U.K.            PROPN           False
startup         startup         NOUN            False
for             for             ADP             True
$               $               SYM             False
1               1               NUM             False
billion         billion         NUM             False


### Dependency parsing

In [31]:
for chunk in doc.noun_chunks:
    print(f'{chunk.text:{30}} {chunk.root.text:{15}} {chunk.root.dep_}')

Apple                          Apple           nsubj
U.K. startup                   startup         dobj


### Named Entity Recognition

In [32]:
doc

Apple isn't looking at buying U.K. startup for $1 billion

In [39]:
for ent in doc.ents:
    print(f'{ent.text:{15}} {ent.label_:{15}}')

Apple           ORG            
U.K.            GPE            
$1 billion      MONEY          


### Sentence Segmentation

In [47]:
for sen in doc.sents:
    print(sen)

Apple isn't looking at buying U.K. startup for $1 billion


In [58]:
doc2 = nlp("Welcome to the U.K. lecture. Please focus on what you can grasp")

In [59]:
for sent in doc2.sents:
    print(sent)
    
# Segmentation done on the basis of .,?,!

Welcome to the U.K. lecture.
Please focus on what you can grasp


In [81]:
def set_rule(doc):
    for token in doc[:-1]:
        if token.text == '...':
            doc[token.i + 1].is_sent_start = True
    return doc

In [86]:
nlp.remove_pipe('set_rule')

('set_rule', <function __main__.set_rule(doc)>)

In [87]:
nlp.add_pipe(set_rule, before = 'parser')
doc3 = nlp("Welcome to...the U.K. lecture...Please focus on what you can grasp")

In [88]:
for sent in doc3.sents:
    print(sent)

Welcome to...
the U.K. lecture...
Please focus on what you can grasp


In [89]:
for token in doc3:
    print(token.text)

Welcome
to
...
the
U.K.
lecture
...
Please
focus
on
what
you
can
grasp


### Visualization

In [91]:
from spacy import displacy

In [92]:
doc

Apple isn't looking at buying U.K. startup for $1 billion

In [95]:
displacy.render(doc3, style='dep')

In [101]:
displacy.render(doc, style='dep', options={'compact':True, 'distance':100})

In [102]:
displacy.render(doc, style='ent')