In [3]:
# importing the model en_core_web_sm of English for vocabluary, syntax & entities
import en_core_web_sm   

# load en_core_web_sm of English for vocabluary, syntax & entities
nlp = en_core_web_sm.load() 

#  "nlp" Object is used to create documents with linguistic annotations.
doc = nlp('what is the price of mcspicy chicken?')

entities=[(i, i.label_) for i in doc.ents]

print(entities)

[]


In [4]:
# import the required libraries
import spacy
import random

In [8]:
# Initial parameters
model_file = None # set esisting model name other wise set it to None
iterations = 25

In [6]:
# Training data
TRAINING_DATA = [('what is the price of McVeggie?', {'entities': [(21, 29, 'FoodProduct')]}), 
                 ('what is the price of McEgg?', {'entities': [(21, 26, 'FoodProduct')]}), 
                 ('what is the price of McChicken?', {'entities': [(21, 30, 'FoodProduct')]}), 
                 ('what is the price of McSpicy Paneer?', {'entities': [(21, 35, 'FoodProduct')]}), 
                 ('what is the price of McSpicy Chicken?', {'entities': [(21, 36, 'FoodProduct')]}),] 

In [9]:
# Testing sample data       
test_sample='what is the price of McAloo?'

# Create NLP model
if model_file is not None:
    nlp = spacy.load(model_file)  
    print("Load Existing NER Model ", model_file)
else:
    nlp = spacy.blank('en')  
    print("Created blank NLP model")

Created blank NLP model


In [11]:
# Create NLP Pipeline
if 'ner' not in nlp.pipe_names: 
    nlp.add_pipe('ner')
else:
    ner_pipe = nlp.get_pipe('ner')

In [12]:
# Add entities labels to the ner pipeline
for text, annotations in TRAINING_DATA:
    for entity in annotations.get('entities'):
        ner_pipe.add_label(entity[2])

In [29]:
from spacy.training.example import Example

# get names of other pipes to disable them during training
other_pipes = [pipe for pipe in nlp.pipe_names if pipe != 'ner']# train NER Model

with nlp.disable_pipes(*other_pipes):  # only train NER
    optimizer = nlp.begin_training()
    for itn in range(iterations):
        print("Iteration Number:" + str(itn))
        random.shuffle(TRAINING_DATA)
        losses = {}
        for text, annotations in TRAINING_DATA:
            # create example object
            example = Example.from_dict(nlp.make_doc(text), annotations) # batch of texts and annotations
            nlp.update([example],  
                drop=0.2,# dropout - make it harder to memorise data
                sgd=optimizer,  # callable to update weights
                losses=losses)
        print("Loss:",losses['ner'])

Iteration Number:0
Loss: 26.925736516714096
Iteration Number:1
Loss: 10.360803883813787
Iteration Number:2
Loss: 7.664226388217872
Iteration Number:3
Loss: 7.554177849869799
Iteration Number:4
Loss: 3.5146913069742363
Iteration Number:5
Loss: 2.235378855385953
Iteration Number:6
Loss: 1.4391341406213742
Iteration Number:7
Loss: 3.370807148093064
Iteration Number:8
Loss: 3.39401508519824
Iteration Number:9
Loss: 1.5366774841387107
Iteration Number:10
Loss: 0.016965338063723398
Iteration Number:11
Loss: 0.0004255714508232036
Iteration Number:12
Loss: 0.0039022217737479364
Iteration Number:13
Loss: 0.00010787500000543969
Iteration Number:14
Loss: 1.5120197009993195e-06
Iteration Number:15
Loss: 1.3886151551261622e-05
Iteration Number:16
Loss: 5.66273864695841e-07
Iteration Number:17
Loss: 1.908502045253412e-06
Iteration Number:18
Loss: 2.8461840056911967e-07
Iteration Number:19
Loss: 8.033790172864669e-08
Iteration Number:20
Loss: 1.1663018478796077e-05
Iteration Number:21
Loss: 4.5703654

In [30]:
test_sample

'what is the price of McAloo?'

In [31]:
doc = nlp(test_sample)

entities=[(i, i.label_) for i in doc.ents]

print(entities)

[(McAloo, 'FoodProduct')]


In [32]:
# save model
model_file ="ner_model"
nlp.to_disk(model_file)# test model


test_document = nlp(test_sample)
for ent in test_document.ents:
    print(ent.text, ent.start_char, ent.end_char, ent.label_)

McAloo 21 27 FoodProduct


## Getting start and end index for entities

In [33]:
news="""The Supreme Court on Sunday issued a slew of directions to the Central and state governments on the COVID-19 situation and directed that no patient shall be denied hospitalisation or essential drugs in any State or Union Territory for lack of local residential or identity proof.
Bench headed by Justice DY Chandrachud directed the Central government to formulate a national policy on admissions to hospitals, within two weeks, which shall be followed by all state governments and till then no patients will be denied admission or essential drugs in absence of local residential or identity proof."""

In [34]:
l=['Supreme Court','Justice DY Chandrachud','Central government']
for i in l:
    st=news.index(i)
    en=st+len(i)
    print(i,st,en)

Supreme Court 4 17
Justice DY Chandrachud 296 318
Central government 332 350


## Example - 2

In [38]:
#SPECIFY THE NER TRAINING DATA
TRAINING_DATA = [
        ("I have deposited an amount of $500 using my debit card.",{"entities":[(7,16,"action"),(30,34,"amount")]}),
        ("Send $500 to the merchant with account number 1234567890. ",{"entities":[(0,4,"action"),(5,9,"amount")]}),
        ("Transfer $20000 to my new bank account ending with the number 4567. ",{"entities":[(0,8,"action"),(9,15,"amount")]}),
        ("Please deposit $2000 in my account. ",{"entities":[(7,14,"action"),(15,20,"amount")]}),
        ("I would like to withdraw $10000 from my bank account. ",{"entities":[(16,24,"action"),(25,31,"amount")]})]

In [39]:
nlp = spacy.blank('en') 

In [40]:
# Create NLP Pipeline
if 'ner' not in nlp.pipe_names: 
    ner_pipe = nlp.add_pipe('ner')
else:
    ner_pipe = nlp.get_pipe('ner')

In [41]:
# Add entities labels to the ner pipeline
for text, annotations in TRAINING_DATA:
    for entity in annotations.get('entities'):
        ner_pipe.add_label(entity[2])

In [42]:
from spacy.training.example import Example

# get names of other pipes to disable them during training
other_pipes = [pipe for pipe in nlp.pipe_names if pipe != 'ner']# train NER Model

with nlp.disable_pipes(*other_pipes):  # only train NER
    optimizer = nlp.begin_training()
    for itn in range(iterations):
        print("Iteration Number:" + str(itn))
        random.shuffle(TRAINING_DATA)
        losses = {}
        for text, annotations in TRAINING_DATA:
            # create example object
            example = Example.from_dict(nlp.make_doc(text), annotations) # batch of texts and annotations
            nlp.update([example],  
                drop=0.2,# dropout - make it harder to memorise data
                sgd=optimizer,  # callable to update weights
                losses=losses)
        print("Loss:",losses['ner'])

Iteration Number:0
Loss: 49.097098767757416
Iteration Number:1
Loss: 35.938131004571915
Iteration Number:2
Loss: 19.31037644483149
Iteration Number:3
Loss: 13.757648970356968
Iteration Number:4
Loss: 23.97635610733414
Iteration Number:5
Loss: 12.094797844431014
Iteration Number:6
Loss: 6.334138346937834
Iteration Number:7
Loss: 2.613179844462138
Iteration Number:8
Loss: 0.584887942298856
Iteration Number:9
Loss: 0.43555951489860917
Iteration Number:10
Loss: 0.0025288696021790224
Iteration Number:11
Loss: 5.3008145413680556e-06
Iteration Number:12
Loss: 3.5749756995299323e-06
Iteration Number:13
Loss: 0.4051954085717397
Iteration Number:14
Loss: 6.736322997678397e-07
Iteration Number:15
Loss: 2.9925313665385765e-05
Iteration Number:16
Loss: 3.9259603301861517e-05
Iteration Number:17
Loss: 8.611520746262742e-05
Iteration Number:18
Loss: 3.8625380422768737e-07
Iteration Number:19
Loss: 1.962152189390658e-08
Iteration Number:20
Loss: 3.822269094814336e-07
Iteration Number:21
Loss: 2.264737

In [44]:
doc2 = nlp("I have withdrawn an amount of $300 with my credit card.")
for ent in doc2.ents:
    print(ent.label_, ent.text)

action withdrawn
amount $300
