# Spacy NLP

In [1]:
# Libraries
import spacy
from tqdm import tqdm
from spacy import displacy
from spacy.tokenizer import Tokenizer
from spacy.lang.en import English

## Sample Text

In [2]:
text = "We're no strangers to love. You know the rules and so do I. A full commitment's what I'm thinking of. You wouldn't get this from any other guy. I just wanna tell you how I'm  feeling. Gotta make you understand. Never gonna give you up. Never gonna let you down. Never gonna run around and desert you. Never gonna make you cry. Never gonna say goodbye. Never gonna tell a lie and hurt you"

## Tokenization

In [3]:
# Load model
nlp = spacy.load('en_core_web_lg')

In [4]:
text2 = text.replace('.','')

### Default tokenizer

In [5]:
tokenizer = nlp.tokenizer
tokens = tokenizer(text2)



default = []

for token in tokens:
    print(token, end = ', ')
    default.append(token)

We, 're, no, strangers, to, love, You, know, the, rules, and, so, do, I, A, full, commitment, 's, what, I, 'm, thinking, of, You, would, n't, get, this, from, any, other, guy, I, just, wanna, tell, you, how, I, 'm,  , feeling, Got, ta, make, you, understand, Never, gon, na, give, you, up, Never, gon, na, let, you, down, Never, gon, na, run, around, and, desert, you, Never, gon, na, make, you, cry, Never, gon, na, say, goodbye, Never, gon, na, tell, a, lie, and, hurt, you, 

### Blank Tokenizer with English Vocab

In [6]:
nlp = English()

# Blank tokenizer with English vocab
tokenizer = Tokenizer(nlp.vocab)
tokens = tokenizer(text2)

blank = []

for token in tokens:
    print(token, end = ', ')
    blank.append(token)

We're, no, strangers, to, love, You, know, the, rules, and, so, do, I, A, full, commitment's, what, I'm, thinking, of, You, wouldn't, get, this, from, any, other, guy, I, just, wanna, tell, you, how, I'm,  , feeling, Gotta, make, you, understand, Never, gonna, give, you, up, Never, gonna, let, you, down, Never, gonna, run, around, and, desert, you, Never, gonna, make, you, cry, Never, gonna, say, goodbye, Never, gonna, tell, a, lie, and, hurt, you, 

### Comparison

In [7]:
print(default)
print(blank)

## Notice the differences between both tokenizers

[We, 're, no, strangers, to, love, You, know, the, rules, and, so, do, I, A, full, commitment, 's, what, I, 'm, thinking, of, You, would, n't, get, this, from, any, other, guy, I, just, wanna, tell, you, how, I, 'm,  , feeling, Got, ta, make, you, understand, Never, gon, na, give, you, up, Never, gon, na, let, you, down, Never, gon, na, run, around, and, desert, you, Never, gon, na, make, you, cry, Never, gon, na, say, goodbye, Never, gon, na, tell, a, lie, and, hurt, you]
[We're, no, strangers, to, love, You, know, the, rules, and, so, do, I, A, full, commitment's, what, I'm, thinking, of, You, wouldn't, get, this, from, any, other, guy, I, just, wanna, tell, you, how, I'm,  , feeling, Gotta, make, you, understand, Never, gonna, give, you, up, Never, gonna, let, you, down, Never, gonna, run, around, and, desert, you, Never, gonna, make, you, cry, Never, gonna, say, goodbye, Never, gonna, tell, a, lie, and, hurt, you]


## Parts of Speech (POS) Tagging

In [8]:
nlp = spacy.load('en_core_web_sm')
doc = nlp(text)

for token in doc:
    print (token,token.pos_)

We PRON
're AUX
no DET
strangers NOUN
to PART
love VERB
. PUNCT
You PRON
know VERB
the DET
rules NOUN
and CCONJ
so ADV
do VERB
I. PROPN
A DET
full ADJ
commitment NOUN
's AUX
what PRON
I PRON
'm AUX
thinking VERB
of ADP
. PUNCT
You PRON
would AUX
n't PART
get VERB
this PRON
from ADP
any DET
other ADJ
guy NOUN
. PUNCT
I PRON
just ADV
wanna PROPN
tell VERB
you PRON
how SCONJ
I PRON
'm AUX
  SPACE
feeling VERB
. PUNCT
Got VERB
ta PART
make VERB
you PRON
understand VERB
. PUNCT
Never ADV
gon VERB
na PART
give VERB
you PRON
up ADP
. PUNCT
Never ADV
gon VERB
na PART
let VERB
you PRON
down ADP
. PUNCT
Never ADV
gon VERB
na PART
run VERB
around ADV
and CCONJ
desert VERB
you PRON
. PUNCT
Never ADV
gon VERB
na PART
make VERB
you PRON
cry VERB
. PUNCT
Never ADV
gon VERB
na PART
say VERB
goodbye NOUN
. PUNCT
Never ADV
gon VERB
na PART
tell VERB
a DET
lie NOUN
and CCONJ
hurt VERB
you PRON


### Only identify verbs

In [9]:
print("Verbs:", [token.text for token in doc if token.pos_ == "VERB"])

Verbs: ['love', 'know', 'do', 'thinking', 'get', 'tell', 'feeling', 'Got', 'make', 'understand', 'gon', 'give', 'gon', 'let', 'gon', 'run', 'desert', 'gon', 'make', 'cry', 'gon', 'say', 'gon', 'tell', 'hurt']


## Lemmatization

#### Lemmatization: Reducing different forms of a word to a singular form

In [10]:
nlp = spacy.load("en_core_web_sm")

doc = nlp(text)

for token in doc:
    print(token, token.lemma_)

We we
're be
no no
strangers stranger
to to
love love
. .
You you
know know
the the
rules rule
and and
so so
do do
I. I.
A a
full full
commitment commitment
's be
what what
I I
'm be
thinking think
of of
. .
You you
would would
n't not
get get
this this
from from
any any
other other
guy guy
. .
I I
just just
wanna wanna
tell tell
you you
how how
I I
'm be
   
feeling feel
. .
Got got
ta to
make make
you you
understand understand
. .
Never never
gon go
na to
give give
you you
up up
. .
Never never
gon go
na to
let let
you you
down down
. .
Never never
gon go
na to
run run
around around
and and
desert desert
you you
. .
Never never
gon go
na to
make make
you you
cry cry
. .
Never never
gon go
na to
say say
goodbye goodbye
. .
Never never
gon go
na to
tell tell
a a
lie lie
and and
hurt hurt
you you


## NER

In [11]:
text = 'John, who is due for his first big break in a while, the former NFL running back has continued to impress all staff with his speed and quickness. He started off camp with two touchdown pass attempts in his rookie season, but ran into a number of injuries, including one that kept him from playing one game until the second game from Week 10 against Pittsburgh.'

In [12]:
# Spacy model
lg = spacy.load('en_core_web_lg')

In [13]:
doc = lg(text)

In [14]:
displacy.render(doc, style = 'ent')