In [None]:
import spacy

In [None]:
nlp = spacy.load('en_core_web_sm')
nlp.pipe_names

['tagger', 'parser', 'ner']

In [None]:
doc = nlp("Australia wants to force facebook and google to pay media companies for news")

In [None]:
for ent in doc.ents:
  print(ent.text, ent.start_char, ent.end_char, ent.label_)

Australia 0 9 GPE


In [None]:
doc = nlp("I do not have money to pay my credit card bills")
for ent in doc.ents:
  print(ent.text, ent.start_char, ent.end_char, ent.label_)

In [None]:
train = [
         ("Money transfer from my checking account is not working", {"entities":[(6,13, "ACTIVITY"), (23, 39, 'PRODUCT')]}),
         ("I want to check balance in my savings account", {"entities":[(16,23, "ACTIVITY"), (30, 45, 'PRODUCT')]}),
         ("I suspect a fraud in my credit card account", {"entities":[(12,17, "ACTIVITY"), (24, 35, 'PRODUCT')]}),
         ("I am here for opening a new savings account", {"entities":[(14,21, "ACTIVITY"), (28, 43, 'PRODUCT')]}),
         ("Your mortage is in delinquent status", {"entities":[(20,30, "ACTIVITY"), (5, 13, 'PRODUCT')]}),
         ("Your credit card is in past due status", {"entities":[(23,31, "ACTIVITY"), (5, 16, 'PRODUCT')]}),
         ("My loan account is still not approved not funded", {"entities":[(25,37, "ACTIVITY"), (3, 15, 'PRODUCT')]}),
         ("How do I open a new loan account", {"entities":[(9,13, "ACTIVITY"), (20, 32, 'PRODUCT')]}),
         ("What are the charges on Investment account", {"entities":[(13,20, "ACTIVITY"), (24, 12, 'PRODUCT')]}),
         ("Can you explain late charges on my credit card", {"entities":[(21,28, "ACTIVITY"), (35, 46, 'PRODUCT')]}),
         ("I want to open a new loan account", {"entities":[(10,14, "ACTIVITY"), (21, 33, 'PRODUCT')]}),
         ("Can you help updating payment on my credit card", {"entities":[(22,29, "ACTIVITY"), (36, 47, 'PRODUCT')]}),
         ("When is the payment due date on my card", {"entities":[(12,19, "ACTIVITY"), (35, 39, 'PRODUCT')]})

]

In [None]:
nlp.pipe_names

['tagger', 'parser', 'ner']

In [None]:
ner = nlp.get_pipe('ner')

In [None]:
for  _, annotations in train:
  for ent in annotations.get("entities"):
    ner.add_label(ent[2])

In [None]:
disable_pipes = [pipe for pipe in nlp.pipe_names if pipe != 'ner']

In [None]:
import random
from spacy.util import minibatch, compounding
from pathlib import Path

with nlp.disable_pipes(*disable_pipes):
  optimizer = nlp.resume_training()

  for iterations in range(100):
    random.shuffle(train)
    losses = {}

    batches = minibatch(train, size=compounding(1.0, 4.0, 1.001))

    for batch in batches:
      text, annotation = zip(*batch)
      nlp.update(text,
                 annotation,
                 drop=0.5,
                 losses=losses,
                 sgd=optimizer)
      print("Losses", losses)

Losses {'ner': 6.9266517197391435}
Losses {'ner': 10.22781457258824}
Losses {'ner': 18.049810816716967}
Losses {'ner': 28.288482329668543}
Losses {'ner': 32.041417872871506}
Losses {'ner': 37.588643454396696}
Losses {'ner': 51.10917725452182}
Losses {'ner': 61.141748819095845}
Losses {'ner': 67.43537481861122}
Losses {'ner': 77.88440794003269}
Losses {'ner': 90.57671737921497}
Losses {'ner': 104.24288311851284}
Losses {'ner': 117.0183998074902}
Losses {'ner': 7.758010574267246}
Losses {'ner': 14.16094172926191}
Losses {'ner': 20.446759873556985}
Losses {'ner': 25.447113985553287}
Losses {'ner': 37.24438834023668}
Losses {'ner': 43.92789982838859}
Losses {'ner': 56.04735814614524}
Losses {'ner': 65.45842043437762}
Losses {'ner': 69.33992288332664}
Losses {'ner': 75.67586554901462}
Losses {'ner': 84.10075490917475}
Losses {'ner': 94.77419704468153}
Losses {'ner': 102.43852924902345}
Losses {'ner': 10.156483470966727}
Losses {'ner': 22.553749680092245}
Losses {'ner': 29.031014418426743}
L

In [None]:
for text, _ in train:
  doc = nlp(text)
  print('Entities', [(ent.text, ent.label_) for ent in doc.ents])

Entities [('credit card', 'PRODUCT'), ('past due', 'ACTIVITY')]
Entities [('loan account', 'PRODUCT')]
Entities [('loan account', 'PRODUCT')]
Entities [('savings account', 'PRODUCT')]
Entities [('charges', 'ACTIVITY'), ('credit card', 'PRODUCT')]
Entities [('charges', 'ACTIVITY')]
Entities [('mortage is', 'PRODUCT')]
Entities [('checking account', 'PRODUCT')]
Entities [('payment', 'ACTIVITY'), ('credit card', 'PRODUCT')]
Entities [('payment', 'ACTIVITY')]
Entities [('credit card', 'PRODUCT')]
Entities [('savings account', 'PRODUCT')]
Entities [('loan account', 'PRODUCT')]


In [None]:
from spacy import displacy

doc = nlp("what is the process to open a new saving account")

for ent in doc.ents:
  print(ent.text, ent.start_char, ent.end_char, ent.label_)

displacy.render(nlp(doc.text), style='ent', jupyter=True)

  "__main__", mod_spec)


In [None]:
doc = nlp("my credit card payment will be delayed")

for ent in doc.ents:
  print(ent.text, ent.start_char, ent.end_char, ent.label_)

credit card 3 14 PRODUCT
payment 15 22 ACTIVITY


In [None]:
from spacy import displacy

doc = nlp("what are the charges on credit card late payment in bank of america")

for ent in doc.ents:
  print(ent.text, ent.start_char, ent.end_char, ent.label_)

displacy.render(nlp(doc.text), style='ent', jupyter=True)

charges 13 20 ACTIVITY
payment 41 48 ACTIVITY
