In [1]:
# import spacy and the language library
import spacy
nlp = spacy.load('en_core_web_sm')

In [3]:
# Create a string that includes opening and closing quotation marks
mystring = '"We\'re moving to L.A.!"'
print(mystring)

"We're moving to L.A.!"


In [5]:
# Create a Doc object and explore tokens
doc = nlp(mystring)

for token in doc:
    print(token.text, end=' | ')

" | We | 're | moving | to | L.A. | ! | " | 

# Prefixes, suffixes and Infixes

In [9]:
doc2 = nlp(u"We're here to help! Send snail-mail, email support@oursite.com or visit us at http://www.tmart.co.ke!")

for t in doc2:
    print(t)

We
're
here
to
help
!
Send
snail
-
mail
,
email
support@oursite.com
or
visit
us
at
http://www.tmart.co.ke
!


In [11]:
doc3 = nlp(u"Zindi Can Award you some $1000.39 to compete visit their website on https://zindi.africa/competitions")

In [12]:
for t in doc3:
    print(t)

Zindi
Can
Award
you
some
$
1000.39
to
compete
visit
their
website
on
https://zindi.africa/competitions


In [13]:
# Counting the number of tokens
len(doc3)

14

In [14]:
# counting vocab entry
doc3.vocab

<spacy.vocab.Vocab at 0x203780e6948>

In [15]:
len(doc3.vocab)

790

# Named Entities


In [21]:
doc4 = nlp(u"ZindiInc set for competitions in Africa for $6 million")

In [22]:
for token in doc4:
    print(token.text,end='--')

ZindiInc--set--for--competitions--in--Africa--for--$--6--million--

In [24]:
# Checking entity
for ent in doc4.ents:
    print(ent)
    print(ent.label_)
    print(str(spacy.explain(ent.label_)))
    print('\n')

ZindiInc
ORG
Companies, agencies, institutions, etc.


Africa
LOC
Non-GPE locations, mountain ranges, bodies of water


$6 million
MONEY
Monetary values, including unit




# Noun chunks

In [25]:
# Noun chunks(another object properties) - they are base noun phrases


In [26]:
doc5 = nlp(u"Autonomous cars shift insurance liability to car manufactures haha")

In [27]:
for chunk in doc5.noun_chunks:
    print(chunk)

Autonomous cars
insurance liability
car manufactures


In [28]:
doc6 = nlp(u"He was a one-eyed, one-horned, flying, purple people-eater.")

for chunk in doc6.noun_chunks:
    print(chunk.text)

He
a one-eyed, one-horned, flying, purple people-eater


# spacy Tokenization Visualizers

In [29]:
from spacy import displacy

In [31]:
# Display doc3
displacy.render(doc3,style='dep',jupyter=True,options={'distance':110})

In [37]:
# Visualizing the entity recognizer 
doc = nlp(u"Over Last Quater ZindiInc Awarded nearly 20 thousand hoodies and rewarded over $7 million ")
displacy.render(doc,style='ent',jupyter=True)