# Spacy NLP

In [13]:
# Libraries
import spacy
from tqdm import tqdm
from spacy import displacy
from spacy.tokenizer import Tokenizer
from spacy.lang.en import English

## Sample Text

In [2]:
text = "Before boarding your rocket to Mars on 22nd April 1923, remember to pack these items in a bag so they will be within arm’s reach on arrival."

## Tokenization

In [3]:
# Load model
nlp = spacy.load('en_core_web_lg')

### Default tokenizer

In [4]:
tokenizer = nlp.tokenizer
tokens = tokenizer(text)

default = []

for token in tokens:
    print(token, end = ', ')
    default.append(token)

Before, boarding, your, rocket, to, Mars, on, 22nd, April, 1923, ,, remember, to, pack, these, items, in, a, bag, so, they, will, be, within, arm, ’s, reach, on, arrival, ., 

### Blank Tokenizer with English Vocab

In [5]:
nlp = English()

# Blank tokenizer with English vocab
tokenizer = Tokenizer(nlp.vocab)
tokens = tokenizer(text)

blank = []

for token in tokens:
    print(token, end = ', ')
    blank.append(token)

Before, boarding, your, rocket, to, Mars, on, 22nd, April, 1923,, remember, to, pack, these, items, in, a, bag, so, they, will, be, within, arm’s, reach, on, arrival., 

### Comparison

In [6]:
print(blank)
print(default)

## Notice the differences between both tokenizers

[Before, boarding, your, rocket, to, Mars, on, 22nd, April, 1923,, remember, to, pack, these, items, in, a, bag, so, they, will, be, within, arm’s, reach, on, arrival.]
[Before, boarding, your, rocket, to, Mars, on, 22nd, April, 1923, ,, remember, to, pack, these, items, in, a, bag, so, they, will, be, within, arm, ’s, reach, on, arrival, .]


## Parts of Speech (POS) Tagging

In [7]:
nlp = spacy.load('en_core_web_sm')
doc = nlp(text)

for token in doc:
    print (token,token.pos_)

Before ADP
boarding VERB
your PRON
rocket NOUN
to ADP
Mars PROPN
on ADP
22nd ADJ
April PROPN
1923 NUM
, PUNCT
remember VERB
to PART
pack VERB
these DET
items NOUN
in ADP
a DET
bag NOUN
so SCONJ
they PRON
will AUX
be AUX
within ADP
arm NOUN
’s NOUN
reach VERB
on ADP
arrival NOUN
. PUNCT


### Only identify verbs

In [8]:
print("Verbs:", [token.text for token in doc if token.pos_ == "VERB"])

Verbs: ['boarding', 'remember', 'pack', 'reach']


## Lemmatization

#### Lemmatization: Reducing different forms of a word to a singular form

In [9]:
nlp = spacy.load("en_core_web_sm")

doc = nlp(text)

for token in doc:
    print(token, token.lemma_)

Before before
boarding board
your your
rocket rocket
to to
Mars Mars
on on
22nd 22nd
April April
1923 1923
, ,
remember remember
to to
pack pack
these these
items item
in in
a a
bag bag
so so
they they
will will
be be
within within
arm arm
’s ’
reach reach
on on
arrival arrival
. .


## NER

In [10]:
# Spacy model
lg = spacy.load('en_core_web_lg')

In [11]:
doc = lg(text)

In [12]:
displacy.render(doc, style = 'ent')