In [1]:
# Anaconda admin "python -m spacy download en"

In [7]:
# Import spacy
import spacy

# Create an instance of spacy and call it "nlp"
nlp = spacy.load("en_core_web_sm") # Loading english language shortest model

In [8]:
sentence = '"Mr. O\'Neill thinks that the boys\' stories about Chile\'s capital aren\'t amusing."'

In [9]:
print(sentence)

"Mr. O'Neill thinks that the boys' stories about Chile's capital aren't amusing."


In [10]:
# Creating an object of our instance
doc_object = nlp(sentence)

In [11]:
# Separating all the tokens from our instance
for token in doc_object:
    print(token.text)

"
Mr.
O'Neill
thinks
that
the
boys
'
stories
about
Chile
's
capital
are
n't
amusing
.
"


In [12]:
# Breaking the sentence according to Parts of Speech(POS)
for token in doc_object:
    print(token.text, token.pos_, end=" | ")

" PUNCT | Mr. PROPN | O'Neill PROPN | thinks VERB | that ADP | the DET | boys NOUN | ' PART | stories NOUN | about ADP | Chile PROPN | 's PART | capital NOUN | are VERB | n't ADV | amusing ADJ | . PUNCT | " PUNCT | 

In [13]:
sentence = "It is best to access our website from 9 a.m. to 1 p.m. every weekend. The address is www.mywebsite.ie."

In [14]:
# Create a doc_object
doc_object = nlp(sentence)
# Show tokens from the doc_object
for token in doc_object:
    print(token.text)

It
is
best
to
access
our
website
from
9
a.m.
to
1
p.m.
every
weekend
.
The
address
is
www.mywebsite.ie
.


In [15]:
sentence = "I live about 20km from here. Taxi will cost about £50."

In [16]:
doc_object = nlp(sentence)
for token in doc_object:
    print(token)

I
live
about
20
km
from
here
.
Taxi
will
cost
about
£
50
.


In [17]:
len(doc_object)

15

In [18]:
# Sparse matrix 
len(doc_object.vocab)

57853

In [19]:
doc_object = nlp("I really like working with words!")

# Print each token
for token in doc_object:
    print(token)

I
really
like
working
with
words
!


In [20]:
# Extract the first token
doc_object[0]

I

In [21]:
# Extract the tokens from 3 to 6
doc_object[2:5]

like working with

In [22]:
# Extracting last 2 tokens
doc_object[-2:]

words!

In [23]:
# NER
doc_object = nlp("Samsung in Ireland are pleased with their new folding screen that they released after a $9 million investment.")



In [24]:
for token in doc_object:
    print(token, end=" | ")

Samsung | in | Ireland | are | pleased | with | their | new | folding | screen | that | they | released | after | a | $ | 9 | million | investment | . | 

In [29]:
# Show Named Entity Recognition(NER) of doc_object
for entity in doc_object.ents:
    print(entity, entity.label_, spacy.explain(entity.label_))

Samsung ORG Companies, agencies, institutions, etc.
Ireland GPE Countries, cities, states
$9 million MONEY Monetary values, including unit


In [32]:
doc_object = nlp("I like my Nissan car in the U.K.")
for entity in doc_object.ents:
    print(entity, entity.label_)

Nissan ORG
U.K. GPE


In [63]:
# Create a function to display entity from a doc_object.
# Show entity text, label and label explanation
# Test the function with the text "I like my LYIT"
def show_entity_info(doc_object):
        for entity in doc_object.ents:
            print(entity, entity.label_, spacy.explain(entity.label_))

In [64]:
# Function calling
sentence = nlp("I like my LYIT")
show_entity_info(sentence)

LYIT ORG Companies, agencies, institutions, etc.


In [75]:
# Create a function to display entity from a doc_object.
# Show entity text, label and label explanation
# Test the function with the text "I like my LYIT"

def show_entity_info2(doc_object):
    if doc_object:
        for entity in doc_object.ents:
            print(entity, entity.label_, spacy.explain(entity.label_))
    
        else:
            print("There are no entities")



    

In [76]:
 
sentence = nlp("I like my car")
show_entity_info2(sentence)

There are no entities


In [124]:
# With 20 spaces in entities
# Create a function to display entity from a doc_object.
# Show entity text, label and label explanation
# Test the function with the text "I like my LYIT"

def show_entity_info3(doc_object):
    if doc_object:
        for entity in doc_object.ents:
            print(f"{entity.text: {20}} {entity.label_:{10}} {spacy.explain(entity.label_):{20}}")
    
        else:
            print("There are no entities")


In [125]:
sentence = nlp("I like my LYIT")
show_entity_info3(sentence)

ValueError: Sign not allowed in string format specifier

In [90]:
doc_object = nlp("Autonomous cars shift insurance liability towards manufacturers")

In [96]:
for chunk in doc_object.noun_chunks:
    print(chunk.text, chunk.root.text, spacy.explain(chunk.root.dep_), chunk.root.head.text)

Autonomous cars cars nominal subject shift
insurance liability liability direct object shift
manufacturers manufacturers object of preposition towards


In [98]:
# Use displacy to show NER, noun chunks, etc
from spacy import displacy

In [105]:
# Command to render the sentence
displacy.render(doc_object, style="dep", jupyter=True, options={"distance":140, "color":"Red"})

In [121]:
# Load the text file "noun-chunks.txt"

my_text_file = open("noun-chunks.txt")
my_text = my_text_file.read()

# Show entities and labels 
doc_object = nlp(my_text)

for entity in doc_object.ents:
    print(entity, entity.label_)
    
# Display entities using "style = ent"
displacy.render(doc_object, style="ent", jupyter=True)


Sebastian Thrun PERSON
Google ORG
2007 DATE

 GPE
American NORP
Thrun PERSON
Recode ORG
earlier this week DATE

 GPE
less than a decade later DATE
dozens CARDINAL
