<a href="https://colab.research.google.com/github/gulabpatel/NLP_Basics/blob/main/Part%202.2%3A%20Spacy_Custom_Named_Entity_Recognizer.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import spacy

In [2]:
nlp=spacy.load('en_core_web_sm')
nlp.pipe_names

['tagger', 'parser', 'ner']

In [3]:
doc = nlp("Australia wants to force Facebook and Google to pay media companies for news")

In [4]:
for ent in doc.ents:
    print(ent.text, ent.start_char, ent.end_char, ent.label_)

Australia 0 9 GPE
Facebook and Google 25 44 ORG


In [5]:
doc = nlp("I do not have money to pay my credit card account")

In [6]:
for ent in doc.ents:
    print(ent.text, ent.start_char, ent.end_char, ent.label_)

In [7]:
doc = nlp("what is the process to open a new savings account")

In [8]:
for ent in doc.ents:
    print(ent.text, ent.start_char, ent.end_char, ent.label_)

In [9]:
train = [
         ("Money transfer from my checking account is not working", {"entities": [(6, 13, "ACTIVITY"), (23, 39, 'PRODUCT')]}),
         ("I want to check balance in my savings account", {"entities": [(16, 23, "ACTIVITY"), (30, 45, 'PRODUCT')]}),
         ("I suspect a fraud in my credit card account", {"entities": [(12, 17, "ACTIVITY"), (24, 35, 'PRODUCT')]}),
         ("I am here for opening a new savings account", {"entities": [(14, 21, "ACTIVITY"), (28, 43, 'PRODUCT')]}),
         ("Your mortgage is in delinquent status", {"entities": [(20, 30, "ACTIVITY"), (5, 13, 'PRODUCT')]}),
         ("Your credit card is in past due status", {"entities": [(23, 31, "ACTIVITY"), (5, 16, 'PRODUCT')]}),
         ("My loan account is still not approved and funded", {"entities": [(25, 37, "ACTIVITY"), (3, 15, 'PRODUCT'), (42, 48, "ACTIVITY")]}),
         ("How do I open a new loan account", {"entities": [(9, 13, "ACTIVITY"), (20, 32, 'PRODUCT')]}),
         ("What are the charges on Investment account", {"entities": [(13, 20, "ACTIVITY"), (24, 42, 'PRODUCT')]}),
         ("Can you explain late charges on my credit card", {"entities": [(21, 28, "ACTIVITY"), (35, 46, 'PRODUCT')]}),
         ("I want to open a new loan account", {"entities": [(10, 14, "ACTIVITY"), (21, 33, 'PRODUCT')]}),
         ("Can you help updating payment on my credit card", {"entities": [(22, 29, "ACTIVITY"), (36, 47, 'PRODUCT')]}),
         ("When is the payment due date on my card", {"entities": [(12, 19, "ACTIVITY"), (35, 39, 'PRODUCT')]})
        ]

In [10]:
nlp.pipe_names

['tagger', 'parser', 'ner']

In [11]:
ner=nlp.get_pipe("ner")

In [12]:
for _, annotations in train:
  for ent in annotations.get("entities"):
      ner.add_label(ent[2])

In [13]:
disable_pipes = [pipe for pipe in nlp.pipe_names if pipe != 'ner']

In [None]:
import random
from spacy.util import minibatch, compounding
from pathlib import Path

with nlp.disable_pipes(*disable_pipes):
  optimizer = nlp.resume_training()

  for iteration in range(100):

    random.shuffle(train)
    losses = {}

    batches = minibatch(train, size=compounding(1.0, 4.0, 1.001))
    for batch in batches:
        text, annotation = zip(*batch)
        nlp.update(
                    text,  
                    annotation, 
                    drop=0.5, 
                    losses=losses,
                    sgd=optimizer
                )
        print("Losses", losses)

In [15]:
for text, _ in train:
    doc = nlp(text)
    print('Entities', [(ent.text, ent.label_) for ent in doc.ents])

Entities [('charges', 'ACTIVITY'), ('Investment account', 'PRODUCT')]
Entities [('mortgage', 'PRODUCT'), ('delinquent', 'ACTIVITY')]
Entities [('charges', 'ACTIVITY'), ('credit card', 'PRODUCT')]
Entities [('loan account', 'PRODUCT'), ('not approved', 'ACTIVITY'), ('funded', 'ACTIVITY')]
Entities [('open', 'ACTIVITY'), ('loan account', 'PRODUCT')]
Entities [('checking account', 'PRODUCT')]
Entities [('balance', 'ACTIVITY'), ('savings account', 'PRODUCT')]
Entities [('payment', 'ACTIVITY'), ('credit card', 'PRODUCT')]
Entities [('payment', 'ACTIVITY'), ('card', 'PRODUCT')]
Entities [('credit card', 'PRODUCT'), ('past', 'ACTIVITY')]
Entities [('fraud', 'ACTIVITY'), ('credit card', 'PRODUCT')]
Entities [('open', 'ACTIVITY'), ('loan account', 'PRODUCT')]
Entities [('savings account', 'PRODUCT')]


In [16]:
from spacy import displacy

doc = nlp("what is the process to open a new savings account")
for ent in doc.ents:
    print(ent.text, ent.start_char, ent.end_char, ent.label_)
displacy.render(nlp(doc.text),style='ent', jupyter=True)    

open 23 27 ACTIVITY
savings account 34 49 PRODUCT


In [17]:
doc = nlp("My credit card payment will be delayed")
for ent in doc.ents:
    print(ent.text, ent.start_char, ent.end_char, ent.label_)

credit card 3 14 PRODUCT
payment 15 22 ACTIVITY


In [18]:
doc = nlp("what are the charges on credit card late payment in Bank of America")
for ent in doc.ents:
    print(ent.text, ent.start_char, ent.end_char, ent.label_)
displacy.render(nlp(doc.text),style='ent', jupyter=True)

charges 13 20 ACTIVITY
credit card 24 35 PRODUCT
payment 41 48 ACTIVITY


In [19]:
doc = nlp("I lost my investment account password and cannot open my account now")
for ent in doc.ents:
    print(ent.text, ent.start_char, ent.end_char, ent.label_)
displacy.render(nlp(doc.text),style='ent', jupyter=True)

investment account 10 28 PRODUCT
open 49 53 ACTIVITY
account now 57 68 PRODUCT


In [20]:
doc = nlp("what is the status of my loan account")
for ent in doc.ents:
    print(ent.text, ent.start_char, ent.end_char, ent.label_)

loan account 25 37 PRODUCT


https://explosion.ai/blog/pseudo-rehearsal-catastrophic-forgetting

In [21]:
doc = nlp("Australia wants to force Facebook and Google to pay media companies for news")
for ent in doc.ents:
    print(ent.text, ent.start_char, ent.end_char, ent.label_)

------------------------------------------------