In [3]:
import spacy

In [4]:
nlp = spacy.load("en_core_web_sm")



In [5]:
def show_ents(doc):
    if doc.ents:
        for ent in doc.ents:
            print(ent.text+' - '+ent.label_+' - '+str(spacy.explain(ent.label_)))
    else:
        print('No named entities found.')

In [6]:
doc = nlp(u'May i go to washington, DC next May to see the Washington Manument?')

In [7]:
show_ents(doc)

washington - GPE - Countries, cities, states
DC - GPE - Countries, cities, states
next May - DATE - Absolute or relative dates or periods
the Washington Manument - ORG - Companies, agencies, institutions, etc.


In [8]:
doc = nlp(u'can i please borrow 500 dollars from you to buy some Microsoft stock?')

In [9]:
for ent in doc.ents:
    print(ent.text, ent.start, ent.end, ent.start_char, ent.end_char,
ent.label_)

500 dollars 4 6 20 31 MONEY
Microsoft 11 12 53 62 ORG


In [10]:
# Adding a Named entity to a span

In [11]:
doc = nlp(u'Tesla to build a U.K. factory for $6 million')

In [13]:
show_ents(doc)

U.K. - GPE - Countries, cities, states
$6 million - MONEY - Monetary values, including unit


In [14]:
# Right Now, spacy does not recognize "Tesla" as a company

In [15]:
from spacy.tokens import Span

In [27]:
ORG = doc.vocab.strings[u'ORG']

In [29]:
new_ent = Span(doc, 0, 1, label=ORG)

In [31]:
doc.ents = list(doc.ents)+[new_ent]

In [33]:
for ent in doc.ents:
    print(ent.text, ent.start, ent.end, ent.start_char, ent.end_char,
ent.label_)

Tesla 0 1 0 5 ORG
U.K. 4 5 17 21 GPE
$6 million 7 10 34 44 MONEY


In [35]:
for ent in doc.ents:
    print(ent.text, ent.label_)

Tesla ORG
U.K. GPE
$6 million MONEY


In [37]:
import spacy
from spacy import displacy

In [39]:
# Load a SpaCy model
nlp = spacy.load("en_core_web_sm")

In [40]:
# Process a sentence
doc = nlp("Tesla is revolutionizing the electric vehicle industry.")

In [41]:
# Visualize entities using displacy
displacy.render(doc, style="ent", jupyter=True)  # Use 'jupyter=True' for Jupyter/Colab

In [45]:
doc = nlp(u' Our company plans to introduce a new vacuum cleaner.'
          u'if succesful, the vacuum cleaner will be our first product.')

In [47]:
show_ents(doc)

first - ORDINAL - "first", "second", etc.


In [49]:
# import PhraseMatcher and create a matcher object:
from spacy.matcher import PhraseMatcher

In [51]:
matcher = PhraseMatcher(nlp.vocab)

In [53]:
phrase_list = ['vacuum cleaner', 'vacuum-cleaner']
phrase_patterns = [nlp(text) for text in phrase_list]

In [55]:
matcher.add('newproduct', None, *phrase_patterns)
matches = matcher(doc)
matches

[(2689272359382549672, 13, 15)]

In [57]:
# here we create spans from each match, and create named entities from them:
from spacy.tokens import span

In [64]:
from spacy.tokens import Span

PROD = doc.vocab.strings[u'PRODUCT']
new_event = [Span(doc, match[1], match[2], label=PROD) for match in matches]
doc.ents = list(doc.ents) + new_event
show_ents(doc)

vacuum cleaner - PRODUCT - Objects, vehicles, foods, etc. (not services)
first - ORDINAL - "first", "second", etc.


In [66]:
doc = nlp(u'Originally phriced at $29.50, the sweater was marked down to five dollars.')
show_ents(doc)

29.50 - MONEY - Monetary values, including unit
five dollars - MONEY - Monetary values, including unit


In [70]:
len([ent for ent in doc.ents if ent.label_=='MONEY'])

2

In [72]:
# problem with line Breaks

In [74]:
spacy.__version__
doc = nlp(u'Originally phriced at $29.50, the sweater was marked down to five dollars.')
show_ents(doc)

29.50 - MONEY - Monetary values, including unit
five dollars - MONEY - Monetary values, including unit


In [76]:
doc = nlp(u"Autonomous cars shift insurance liability toward manufacturers.")

In [82]:
for chunk in doc.noun_chunks:
    print(chunk.text+' - '+chunk.root.text+'-'+chunk.root.dep_+' - '+chunk.root.head.text)

Autonomous cars - cars-nsubj - shift
insurance liability - liability-dobj - shift
manufacturers - manufacturers-pobj - toward


In [84]:
# Visualizing Named Entities

In [86]:
import spacy

In [92]:
nlp = spacy.load("en_core_web_sm")

In [90]:
from spacy import displacy

In [108]:
doc =  nlp (u' Over the last quarter Apple sold nearly 20 thousand iPods for a profit of $6 million.'
            u'By contrast, sony sold only 7 thousand Walkman music players.')

In [106]:
displacy.render(doc,style='ent', jupyter=True)

# Sentence segmentation

In [111]:
import spacy

In [113]:
nlp= spacy.load("en_core_web_sm")

In [117]:
doc = nlp(u'This is the first sentence. This is another sentence. The is the last sentence.')

In [119]:
for sent in doc.sents:
    print(sent)

This is the first sentence.
This is another sentence.
The is the last sentence.


In [121]:
print(doc[1])

is


In [123]:
print(doc.sents[1])

TypeError: 'generator' object is not subscriptable

In [125]:
doc_sents=[sent for sent in doc.sents]
doc_sents

[This is the first sentence.,
 This is another sentence.,
 The is the last sentence.]