<a href="https://colab.research.google.com/github/abhi1994-tim/NLP_Projects/blob/main/NLP_Tutorial_Named_Entity_Recognition_(NER).ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#NLP Tutorial: Named Entity Recognition (NER)

In [1]:
import spacy

In [3]:
nlp = spacy.load("en_core_web_sm")

In [4]:
nlp.pipe_names

['tok2vec', 'tagger', 'parser', 'attribute_ruler', 'lemmatizer', 'ner']

In [5]:
doc = nlp("Tesla Inc is going to acquire twitter for $45 billion")

In [7]:
for ent in doc.ents:
    print(ent.text)

Tesla Inc
$45 billion


In [8]:
for ent in doc.ents:
    print(ent.text, " | ", ent.label_)

Tesla Inc  |  ORG
$45 billion  |  MONEY


In [9]:
for ent in doc.ents:
    print(ent.text, " | ", ent.label_, "|", spacy.explain(ent.label_))

Tesla Inc  |  ORG | Companies, agencies, institutions, etc.
$45 billion  |  MONEY | Monetary values, including unit


In [10]:
from spacy import displacy

displacy.render(doc, style="ent")

In [14]:
nlp.pipe_labels["ner"]

['CARDINAL',
 'DATE',
 'EVENT',
 'FAC',
 'GPE',
 'LANGUAGE',
 'LAW',
 'LOC',
 'MONEY',
 'NORP',
 'ORDINAL',
 'ORG',
 'PERCENT',
 'PERSON',
 'PRODUCT',
 'QUANTITY',
 'TIME',
 'WORK_OF_ART']

In [15]:
doc = nlp("Michael Bloomberg founded Bloomberg in 1982")
for ent in doc.ents:
    print(ent.text, "|", ent.label_, "|", spacy.explain(ent.label_))

Michael Bloomberg | PERSON | People, including fictional
Bloomberg | PERSON | People, including fictional
1982 | DATE | Absolute or relative dates or periods


In [16]:
doc = nlp("Michael Bloomberg founded Bloomberg  inc in 1982")
for ent in doc.ents:
    print(ent.text, "|", ent.label_, "|", spacy.explain(ent.label_))

Michael Bloomberg | PERSON | People, including fictional
Bloomberg  inc | ORG | Companies, agencies, institutions, etc.
1982 | DATE | Absolute or relative dates or periods


In [17]:
doc = nlp("Abhijeet Gupta is learning Data science from Feb 2023 to the till December 2024 and he is looking for a job in any Role")
for ent in doc.ents:
    print(ent.text, "|", ent.label_, "|", spacy.explain(ent.label_))

Abhijeet Gupta | PERSON | People, including fictional
Feb 2023 | DATE | Absolute or relative dates or periods
December 2024 | DATE | Absolute or relative dates or periods


In [18]:
# Also There is an Hugging face from there we can use many NER problems and models as per we need

In [19]:
doc = nlp("Tesla Inc is going to acquire Twitter Inc for $45 billion")
for ent in doc.ents:
    print(ent.text, " | ", ent.label_, " | ", ent.start_char, "|", ent.end_char)

Tesla Inc  |  ORG  |  0 | 9
Twitter Inc  |  ORG  |  30 | 41
$45 billion  |  MONEY  |  46 | 57


In [20]:
doc[0]

Tesla

In [21]:
type(doc[0])

spacy.tokens.token.Token

In [23]:
doc[3:6]

going to acquire

In [24]:
type(doc[3:6])

spacy.tokens.span.Span

In [25]:
from spacy.tokens import Span

s1 = Span(doc, 0, 1, label="ORG")
s2 = Span(doc, 6, 7, label="ORG")

doc.set_ents([s1, s2], default="unmodified") # this code here is alter the enitity if the model is not able to search on right place.
# we can modify the entity as we like, from the above code

In [26]:
doc = nlp("Tesla Inc is going to acquire Twitter Inc for $45 billion")
for ent in doc.ents:
    print(ent.text, " | ", ent.label_, " | ", ent.start_char, "|", ent.end_char)

Tesla Inc  |  ORG  |  0 | 9
Twitter Inc  |  ORG  |  30 | 41
$45 billion  |  MONEY  |  46 | 57
