## Failure of Pre-built NER model

In [22]:
# importing the model en_core_web_sm of English for vocabluary, syntax & entities
import en_core_web_sm   

# load en_core_web_sm of English for vocabluary, syntax & entities
nlp = en_core_web_sm.load() 

#  "nlp" Object is used to create documents with linguistic annotations.
doc = nlp('what is the price of mcspicy chicken?')

entities=[(i, i.label_) for i in doc.ents]

print(entities)

[]


## Train Custom NER model

In [7]:
# import the required libraries
import spacy
import random

In [17]:
# Initial parameters
model_file = "ner_model" # set esisting model name other wise set it to None
iterations = 20

In [18]:
# Training data
TRAINING_DATA = [('what is the price of McVeggie?', {'entities': [(21, 29, 'FoodProduct')]}), 
                 ('what is the price of McEgg?', {'entities': [(21, 26, 'FoodProduct')]}), 
                 ('what is the price of McChicken?', {'entities': [(21, 30, 'FoodProduct')]}), 
                 ('what is the price of McSpicy Paneer?', {'entities': [(21, 35, 'FoodProduct')]}), 
                 ('what is the price of McSpicy Chicken?', {'entities': [(21, 36, 'FoodProduct')]}),] 

In [19]:
# Testing sample data       
test_sample='what is the price of McAloo?'

# Create NLP model
if model_file is not None:
    nlp = spacy.load(model_file)  
    print("Load Existing NER Model ", model_file)
else:
    nlp = spacy.blank('en')  
    print("Created blank NLP model")

Load Existing NER Model  ner_model


In [13]:
# Create NLP Pipeline
if 'ner' not in nlp.pipe_names: 
    ner_pipe = nlp.create_pipe('ner')
    nlp.add_pipe(ner_pipe, last=True)
else:
    ner_pipe = nlp.get_pipe('ner')

In [14]:
# Add entities labels to the ner pipeline
for text, annotations in TRAINING_DATA:
    for entity in annotations.get('entities'):
        ner_pipe.add_label(entity[2])

In [15]:
# get names of other pipes to disable them during training
other_pipes = [pipe for pipe in nlp.pipe_names if pipe != 'ner']# train NER Model

with nlp.disable_pipes(*other_pipes):  # only train NER
    optimizer = nlp.begin_training()
    for itn in range(iterations):
        print("Iteration Number:" + str(itn))
        random.shuffle(TRAINING_DATA)
        losses = {}
        for text, annotations in TRAINING_DATA:
            nlp.update(
                [text],  # batch of texts
                [annotations],  # batch of annotations
                drop=0.2,# dropout - make it harder to memorise data
                sgd=optimizer,  # callable to update weights
                losses=losses)
        print("Loss:",losses['ner'])

Iteration Number:0
Loss: 24.96239483356476
Iteration Number:1
Loss: 9.831780625216197
Iteration Number:2
Loss: 7.696350081167111
Iteration Number:3
Loss: 4.386721943319117
Iteration Number:4
Loss: 2.548774863766198
Iteration Number:5
Loss: 1.7411677321941923
Iteration Number:6
Loss: 5.561346650243195
Iteration Number:7
Loss: 3.140873395567486
Iteration Number:8
Loss: 3.9140133520781397
Iteration Number:9
Loss: 2.7504411090849836
Iteration Number:10
Loss: 4.473644116555793
Iteration Number:11
Loss: 3.4161329613894127
Iteration Number:12
Loss: 1.4314458954928575
Iteration Number:13
Loss: 0.5273229079712749
Iteration Number:14
Loss: 0.008317893163395046
Iteration Number:15
Loss: 0.0001270340421934693
Iteration Number:16
Loss: 2.789346120694367e-05
Iteration Number:17
Loss: 4.012501949276052e-06
Iteration Number:18
Loss: 1.79791904994263e-06
Iteration Number:19
Loss: 1.0227177624132128e-06


In [16]:
# save model
model_file ="ner_model"
nlp.to_disk(model_file)# test model


test_document = nlp(test_sample)
for ent in test_document.ents:
    print(ent.text, ent.start_char, ent.end_char, ent.label_)

McAloo 21 27 FoodProduct


## Experiments

In [None]:
('book a ticket indore to delhi', {'entities': [(14, 20, 'Source'),(24, 29,'Destination')]})

In [27]:
'book a ticket indore to delhi'.index('delhi'), 'book a ticket indore to delhi'.index('delhi')+len('delhi')

(24, 29)

In [29]:
news="""The Supreme Court on Sunday issued a slew of directions to the Central and state governments on the COVID-19 situation and directed that no patient shall be denied hospitalisation or essential drugs in any State or Union Territory for lack of local residential or identity proof.
Bench headed by Justice DY Chandrachud directed the Central government to formulate a national policy on admissions to hospitals, within two weeks, which shall be followed by all state governments and till then no patients will be denied admission or essential drugs in absence of local residential or identity proof."""

In [31]:
news.index('Supreme Court'),news.index('Supreme Court')+len('Supreme Court')

(4, 17)

In [32]:
l=['Supreme Court','Justice DY Chandrachud','Central government']
for i in l:
    st=news.index(i)
    en=st+len(i)
    print(i,st,en)

Supreme Court 4 17
Justice DY Chandrachud 296 318
Central government 332 350


In [None]:
NER ===> classification

In [None]:
X1, X2           y
[start, end,] entity_type

21, 26 source
21, 24 destination
....

## Custom NER model 2

In [1]:
import spacy
import random


#SPECIFY THE NER TRAINING DATA
TRAIN_DATA = [
        ("I have deposited an amount of $500 using my debit card.",{"entities":[(7,16,"action"),(30,34,"amount")]}),
        ("Send $500 to the merchant with account number 1234567890. ",{"entities":[(0,4,"action"),(5,9,"amount")]}),
        ("Transfer $20000 to my new bank account ending with the number 4567. ",{"entities":[(0,8,"action"),(9,15,"amount")]}),
        ("Please deposit $2000 in my account. ",{"entities":[(7,14,"action"),(15,20,"amount")]}),
        ("I would like to withdraw $10000 from my bank account. ",{"entities":[(16,24,"action"),(25,31,"amount")]})]

In [2]:
# Create balnk NLP model
nlp = spacy.blank('en') 

# Create pipe
ner_pipe = nlp.create_pipe('ner')
nlp.add_pipe(ner_pipe, last=True)

In [4]:
# Add entities labels to the ner pipeline
for text, annotations in TRAIN_DATA:
    for entity in annotations.get('entities'):
        ner_pipe.add_label(entity[2])

In [10]:
optimizer = nlp.begin_training()

for itn in range(10):
    #print("Iteration Number:" + str(itn))
    random.shuffle(TRAIN_DATA)
    losses = {}
    for text, annotations in TRAIN_DATA:
        nlp.update(
            [text],  # batch of texts
            [annotations],  # batch of annotations
            drop=0.2,# dropout - make it harder to memorise data
            sgd=optimizer,  # callable to update weights
            losses=losses)
    print("Loss:",losses['ner'])

Loss: 5.482499836982065e-10
Loss: 7.939340426233123e-12
Loss: 6.369188466744763e-11
Loss: 1.2949502417286581e-08
Loss: 1.0019092441946074e-12
Loss: 3.52677573151287e-11
Loss: 2.4727345753723725e-10
Loss: 5.077524760659583e-13
Loss: 3.206875643222707e-07
Loss: 1.1300151441587066e-13


In [11]:
#SAVE THE CUSTOM NER MODEL TO
nlp.to_disk("custom_ner_model")
print("Model saved")

Model saved


In [12]:
nlp2 = spacy.load("custom_ner_model")
doc2 = nlp2("I have withdrawn an amount of $300 with my credit card.")
for ent in doc2.ents:
    print(ent.label_, ent.text)

action withdrawn
amount $300
