## Failure of Pre-built NER model

In [7]:
# importing the model en_core_web_sm of English for vocabluary, syntax & entities
import en_core_web_sm   

# load en_core_web_sm of English for vocabluary, syntax & entities
nlp = en_core_web_sm.load() 

#  "nlp" Object is used to create documents with linguistic annotations.
doc = nlp('what is the price of mcspicy chicken? I live in Indore')

entities=[(i, i.label_) for i in doc.ents]

print(entities)

[(Indore, 'GPE')]


## Train Custom NER model

In [8]:
# import the required libraries
import spacy
import random

In [9]:
# Initial parameters
model_file = None # set existing model name other wise set it to None
iterations = 20

In [10]:
# Training data
TRAINING_DATA = [('what is the price of McVeggie?', {'entities': [(21, 29, 'FoodProduct')]}), 
                 ('what is the price of McEgg?', {'entities': [(21, 26, 'FoodProduct')]}), 
                 ('what is the price of McChicken?', {'entities': [(21, 30, 'FoodProduct')]}), 
                 ('what is the price of McSpicy Paneer?', {'entities': [(21, 35, 'FoodProduct')]}), 
                 ('what is the price of McSpicy Chicken?', {'entities': [(21, 36, 'FoodProduct')]}),] 

In [11]:
# Testing sample data       
test_sample='what is the price of McAloo?'

# Create NLP model
if model_file is not None:
    nlp = spacy.load(model_file)  
    print("Load Existing NER Model ", model_file)
else:
    nlp = spacy.blank('en')  
    print("Created blank NLP model")

Created blank NLP model


In [14]:
# Create NLP Pipeline
if 'ner' not in nlp.pipe_names: 
    nlp.add_pipe('ner')
else:
    ner_pipe = nlp.get_pipe('ner')

In [16]:
for text, annotations in TRAINING_DATA:
    print(text)
    print(annotations)

what is the price of McVeggie?
{'entities': [(21, 29, 'FoodProduct')]}
what is the price of McEgg?
{'entities': [(21, 26, 'FoodProduct')]}
what is the price of McChicken?
{'entities': [(21, 30, 'FoodProduct')]}
what is the price of McSpicy Paneer?
{'entities': [(21, 35, 'FoodProduct')]}
what is the price of McSpicy Chicken?
{'entities': [(21, 36, 'FoodProduct')]}


In [17]:
# Add entities labels to the ner pipeline
for text, annotations in TRAINING_DATA:
    for entity in annotations.get('entities'):
        ner_pipe.add_label(entity[2])

In [24]:
from spacy.training.example import Example

# get names of other pipes to disable them during training
other_pipes = [pipe for pipe in nlp.pipe_names if pipe != 'ner']# train NER Model

with nlp.disable_pipes(*other_pipes):  # only train NER
    optimizer = nlp.begin_training()
    for itn in range(iterations):
        print("Iteration Number:" + str(itn))
        random.shuffle(TRAINING_DATA)
        losses = {}
        for text, annotations in TRAINING_DATA:
            # create example object
            example = Example.from_dict(nlp.make_doc(text), annotations) # batch of texts and annotations
            nlp.update([example],  
                drop=0.2,# dropout - make it harder to memorise data
                sgd=optimizer,  # callable to update weights
                losses=losses)
        print("Loss:",losses['ner'])

Iteration Number:0
Loss: 27.67336332798004
Iteration Number:1
Loss: 14.65741965174675
Iteration Number:2
Loss: 6.1494375862203015
Iteration Number:3
Loss: 3.840953488090271
Iteration Number:4
Loss: 2.7248798537986763
Iteration Number:5
Loss: 2.205629466531748
Iteration Number:6
Loss: 5.353648221464597
Iteration Number:7
Loss: 1.589421958231739
Iteration Number:8
Loss: 3.2146930639546265
Iteration Number:9
Loss: 2.4547040706671233
Iteration Number:10
Loss: 0.29687204025395736
Iteration Number:11
Loss: 0.08804141546645604
Iteration Number:12
Loss: 0.005363100676044991
Iteration Number:13
Loss: 0.00011563083046032588
Iteration Number:14
Loss: 1.5327865883590205e-05
Iteration Number:15
Loss: 5.2827830455521246e-05
Iteration Number:16
Loss: 0.00030795995479221237
Iteration Number:17
Loss: 6.2232355050970254e-06
Iteration Number:18
Loss: 1.0672198615425215e-05
Iteration Number:19
Loss: 1.2493638922025262e-06


In [25]:
# save model
model_file ="ner_model"
nlp.to_disk(model_file)# test model


test_document = nlp(test_sample)
for ent in test_document.ents:
    print(ent.text, ent.start_char, ent.end_char, ent.label_)

McAloo 21 27 FoodProduct


## Experiments

In [33]:
('book a ticket indore to delhi', {'entities': [(14, 20, 'Source'),(24, 29,'Destination')]})

('book a ticket indore to delhi',
 {'entities': [(14, 20, 'Source'), (24, 29, 'Destination')]})

In [34]:
'book a ticket indore to delhi'.index('delhi'), 'book a ticket indore to delhi'.index('delhi')+len('delhi')

(24, 29)

In [36]:
news="""The Supreme Court on Sunday issued a slew of directions to the Central and state governments on the COVID-19 situation and directed that no patient shall be denied hospitalisation or essential drugs in any State or Union Territory for lack of local residential or identity proof.
Bench headed by Justice DY Chandrachud directed the Central government to formulate a national policy on admissions to hospitals, within two weeks, which shall be followed by all state governments and till then no patients will be denied admission or essential drugs in absence of local residential or identity proof."""

In [37]:
news.index('Supreme Court'),news.index('Supreme Court')+len('Supreme Court')

(4, 17)

In [38]:
l=['Supreme Court','Justice DY Chandrachud','Central government']
for i in l:
    st=news.index(i)
    en=st+len(i)
    print(i,st,en)

Supreme Court 4 17
Justice DY Chandrachud 296 318
Central government 332 350


In [None]:
NER ===> classification

In [None]:
X1, X2           y
[start, end,] entity_type

21, 26 source
21, 24 destination
....

## Custom NER model 2

In [26]:
import spacy
import random


#SPECIFY THE NER TRAINING DATA
TRAIN_DATA = [
        ("I have deposited an amount of $500 using my debit card.",{"entities":[(7,16,"action"),(30,34,"amount")]}),
        ("Send $500 to the merchant with account number 1234567890. ",{"entities":[(0,4,"action"),(5,9,"amount")]}),
        ("Transfer $20000 to my new bank account ending with the number 4567. ",{"entities":[(0,8,"action"),(9,15,"amount")]}),
        ("Please deposit $2000 in my account. ",{"entities":[(7,14,"action"),(15,20,"amount")]}),
        ("I would like to withdraw $10000 from my bank account. ",{"entities":[(16,24,"action"),(25,31,"amount")]})]

In [28]:
# Create balnk NLP model
nlp = spacy.blank('en') 

# Create pipe
nlp.add_pipe('ner')

<spacy.pipeline.ner.EntityRecognizer at 0x1d96962f940>

In [29]:
# Add entities labels to the ner pipeline
for text, annotations in TRAIN_DATA:
    for entity in annotations.get('entities'):
        ner_pipe.add_label(entity[2])

In [30]:
optimizer = nlp.begin_training()

for itn in range(10):
    #print("Iteration Number:" + str(itn))
    random.shuffle(TRAIN_DATA)
    losses = {}
    for text, annotations in TRAIN_DATA:
        # create example object
        example = Example.from_dict(nlp.make_doc(text), annotations) # batch of texts and annotations
        nlp.update([example],  
            drop=0.2,# dropout - make it harder to memorise data
            sgd=optimizer,  # callable to update weights
            losses=losses)
    print("Loss:",losses['ner'])

Loss: 49.11087340116501
Loss: 35.62243336439133
Loss: 16.901780327782035
Loss: 12.226527539416566
Loss: 34.31252641390529
Loss: 9.412501119382796
Loss: 5.754572443964207
Loss: 2.3702553956709096
Loss: 0.2659346944671661
Loss: 0.13192886341598045


In [31]:
#SAVE THE CUSTOM NER MODEL TO
nlp.to_disk("custom_ner_model")
print("Model saved")

Model saved


In [32]:
nlp2 = spacy.load("custom_ner_model")
doc2 = nlp2("I have withdrawn an amount of $300 with my credit card.")
for ent in doc2.ents:
    print(ent.label_, ent.text)

action withdrawn
amount $300
