In [1]:
# Anaconda admin "python -m spacy download en"

In [7]:
# Import spacy
import spacy

# Create an instance of spacy and call it "nlp"
nlp = spacy.load("en_core_web_sm") # Loading english language shortest model

In [8]:
sentence = '"Mr. O\'Neill thinks that the boys\' stories about Chile\'s capital aren\'t amusing."'

In [9]:
print(sentence)

"Mr. O'Neill thinks that the boys' stories about Chile's capital aren't amusing."


In [10]:
# Creating an object of our instance
doc_object = nlp(sentence)

In [11]:
# Separating all the tokens from our instance
for token in doc_object:
    print(token.text)

"
Mr.
O'Neill
thinks
that
the
boys
'
stories
about
Chile
's
capital
are
n't
amusing
.
"


In [12]:
# Breaking the sentence according to Parts of Speech(POS)
for token in doc_object:
    print(token.text, token.pos_, end=" | ")

" PUNCT | Mr. PROPN | O'Neill PROPN | thinks VERB | that ADP | the DET | boys NOUN | ' PART | stories NOUN | about ADP | Chile PROPN | 's PART | capital NOUN | are VERB | n't ADV | amusing ADJ | . PUNCT | " PUNCT | 

In [13]:
sentence = "It is best to access our website from 9 a.m. to 1 p.m. every weekend. The address is www.mywebsite.ie."

In [14]:
# Create a doc_object
doc_object = nlp(sentence)
# Show tokens from the doc_object
for token in doc_object:
    print(token.text)

It
is
best
to
access
our
website
from
9
a.m.
to
1
p.m.
every
weekend
.
The
address
is
www.mywebsite.ie
.


In [15]:
sentence = "I live about 20km from here. Taxi will cost about £50."

In [16]:
doc_object = nlp(sentence)
for token in doc_object:
    print(token)

I
live
about
20
km
from
here
.
Taxi
will
cost
about
£
50
.


In [17]:
len(doc_object)

15

In [18]:
# Sparse matrix 
len(doc_object.vocab)

57853

In [19]:
doc_object = nlp("I really like working with words!")

# Print each token
for token in doc_object:
    print(token)

I
really
like
working
with
words
!


In [20]:
# Extract the first token
doc_object[0]

I

In [21]:
# Extract the tokens from 3 to 6
doc_object[2:5]

like working with

In [22]:
# Extracting last 2 tokens
doc_object[-2:]

words!

In [23]:
# NER
doc_object = nlp("Samsung in Ireland are pleased with their new folding screen that they released after a $9 million investment.")



In [24]:
for token in doc_object:
    print(token, end=" | ")

Samsung | in | Ireland | are | pleased | with | their | new | folding | screen | that | they | released | after | a | $ | 9 | million | investment | . | 

In [26]:
# Show NER of doc_object
for entity in doc_object.ents:
    print(entity, entity.label, spacy.explain(entity.label_))

Samsung 381 Companies, agencies, institutions, etc.
Ireland 382 Countries, cities, states
$9 million 391 Monetary values, including unit
