### Tokenization and Visualization using Spacy

In [1]:
import spacy

In [2]:
#### Load English Library en

In [3]:
nlp = spacy.load('en')

In [4]:
nlp.vocab.length

478

In [5]:
#### Create a doc object

In [6]:
doc = nlp('hello can you help me ?')

In [7]:
for token in doc:
    print("{}:{}".format(token, token.vector[:3]))

hello:[-1.6857275  -0.28448594  0.04885194]
can:[-1.4881763  0.9842916 -2.8467884]
you:[-0.4530028  3.304438  -3.0248477]
help:[-0.5302762  2.4663606  0.4537729]
me:[ 2.1858847   0.5189028  -0.45685858]
?:[-2.4404542  -0.59511113  1.0436571 ]


In [8]:
[(token.text, token.pos_) for token in doc]

[('hello', 'INTJ'),
 ('can', 'VERB'),
 ('you', 'PRON'),
 ('help', 'VERB'),
 ('me', 'PRON'),
 ('?', 'PUNCT')]

In [9]:
#### Linguistic Features in Spacy

In [10]:
#Tokenization
doc = nlp("Apple is looking at buying U.K. startup for $1 billion")
for token in doc:
    print(token.text)

Apple
is
looking
at
buying
U.K.
startup
for
$
1
billion


In [11]:
#Adding special case tokenization rules
from spacy.symbols import ORTH
doc = nlp("gimme that pen.")
print([w.text for w in doc])

#Add a special case
special_case = [{ORTH:'gim'}, {ORTH:'me'}]
print(special_case)

#tokenize  the special case
nlp.tokenizer.add_special_case("gimme", special_case)

#check new tokenization with special case
print([w.text for w in doc])

['gimme', 'that', 'pen', '.']
[{65: 'gim'}, {65: 'me'}]
['gimme', 'that', 'pen', '.']


#### Visualizations in Spacy using DisplaCy

In [12]:
import spacy
from spacy import displacy

In [13]:
doc = nlp("This is a sentence.")

In [14]:
#displacy.serve(doc, style="dep")
displacy.render(doc, style="dep")

In [15]:
text = "When Sebastian Thrun started working on self-driving cars at Google in 2007, few people outside of the company took him seriously."
doc = nlp(text)

In [16]:
displacy.render(doc, style='ent')

In [17]:
#Named Entity Visualizer options
colors = {"ORG": "linear-gradient(90deg, #aa9cfc, #fc9ce7)"}
options = {"ents": ["ORG"], "colors": colors}
#displacy.serve(doc, style="ent", options=options)
displacy.render(doc, style="ent", options=options)

In [18]:
#Named Entity Visualizer options
options = {"ents": ["PERSON", "ORG", "PRODUCT"],
           "colors": {"ORG": "yellow"}}
#displacy.serve(doc, style="ent", options=options)
displacy.render(doc, style="ent", options=options)

In [19]:
#Dependency Visualizer options
options = {"compact": True, "color": "blue"}
#displacy.serve(doc, style="dep", options=options)
displacy.render(doc, style="dep", options=options)

In [20]:
displacy.Span

spacy.tokens.span.Span