In [1]:
import spacy

nlp = spacy.load('en_core_web_md')

In [2]:
chat_text = ('Hi my name is Donald Duck, I am American, and my bank account 32345128 with sort code 12-15-18. \
                My VISA is 4444333322221111 how do I get access to online banking? \
                I think I have $325 on my account when I checked on 10-05-2011 \
                I have 2 accounts with you. \
                You can reach me on phone 01234 000 001 or donaldduck@gmail.com. \
                Ohh and my address is 1313 Webfoot Walk, Duckburg, Calisota')
chat_test_doc = nlp(chat_text)
for ent in chat_test_doc.ents:
    print(ent.text, ent.start_char, ent.end_char, ent.label_, spacy.explain(ent.label_))

Donald Duck 14 25 PERSON People, including fictional
American 32 40 NORP Nationalities or religious or political groups
32345128 62 70 CARDINAL Numerals that do not fall under another type
12 86 88 CARDINAL Numerals that do not fall under another type
VISA 115 119 ORG Companies, agencies, institutions, etc.
4444333322221111 123 139 DATE Absolute or relative dates or periods
325 211 214 MONEY Monetary values, including unit
10-05-2011 247 257 DATE Absolute or relative dates or periods
2 281 282 CARDINAL Numerals that do not fall under another type
Webfoot Walk 426 438 FAC Buildings, airports, highways, bridges, etc.
Duckburg 440 448 GPE Countries, cities, states
Calisota 450 458 GPE Countries, cities, states


In [3]:
from spacy import displacy

displacy.serve(chat_test_doc, style='ent')

  "__main__", mod_spec)



Using the 'ent' visualizer
Serving on http://0.0.0.0:5000 ...

Shutting down server on port 5000.


### Let's create a Custom Named Entity 

#### Capture Email, Phone, Bank Account and Sort Code

In [4]:
data1 = 'My bank account is 09876543 with sort code 11-01-45.'
    
data2 = 'You can reach me on my email abc@hotmail.co.uk or call me on 07726 000 123'

data3 = 'Bank Account 09780909, Sort Code 12-34-56'

data4 = 'Contact me on mail mickeymouse@yahoo.com or 01480 345 345'

data5 = 'Hi bank people. Need help with my account, account is 45674567, call me on 01345 567567'

data6 = 'Can you help me with my sort 34-23-12 and contact me using xyz@googlemail.dk'

data7 = 'I live here in the UK and need help with accessing my account. Bank acc is 09876543, 13-20-54. Let me know on 1290 344 456 or helpme@outlook.com'

data8 = 'Sort Code 33-34-35, Bank Account 33445566'

data9 = 'Email: Guffy@yahoo.com; phone: 01346 022 773'

data10 = 'Help, my sort code is 22-23-71, contact me using Daisy@googlemail.co.uk, ohh account no is 44435987'



In [5]:
TRAIN_DATA = [
    (data1, {
        'entities': [(19, 27, 'BANK_ACCOUNT'), (43, 51, 'SORT_CODE')]
    }),
     (data2, {
         'entities': [(29, 46, 'EMAIL'), (61, 74, 'PHONE')]
    }),
     (data3, {
         'entities': [(13, 21, 'BANK_ACCOUNT'), (33, 41, 'SORT_CODE')]
    }),
     (data4, {
         'entities': [(19, 40, 'EMAIL'), (44, 57, 'PHONE')]
    }),
     (data5, {
         'entities': [(54, 62, 'BANK_ACCOUNT'), (75, 87, 'PHONE')]
    }),
     (data6, {
         'entities': [(29, 37, 'SORT_CODE'), (59, 76, 'EMAIL')]
    }),
     (data7, {
         'entities': [(75, 83, 'BANK_ACCOUNT'), (85, 93, 'SORT_CODE'), (110, 122, 'PHONE'), (126, 144, 'EMAIL')]
    }),
     (data8, {
         'entities': [(10, 18, 'SORT_CODE'), (33, 41, 'BANK_ACCOUNT')]
    }),
     (data9, {
         'entities': [(7, 22, 'EMAIL'), (31, 44, 'PHONE')]
    }),
     (data10, {
         'entities': [(22, 30, 'SORT_CODE'), (49, 71, 'EMAIL'), (91, 99, 'BANK_ACCOUNT')]
    }),

]

In [6]:
import random
from tqdm import tqdm

ner = nlp.get_pipe('ner')

n_iter=200
# add labels, Trains data based on annotations 
for _, annotations in TRAIN_DATA:
    for ent in annotations.get('entities'):
        print(ent[2])
        ner.add_label(ent[2])

# get names of other pipes to disable them during training
other_pipes = [pipe for pipe in nlp.pipe_names if pipe != 'ner']
with nlp.disable_pipes(*other_pipes):  # only train NER
    optimizer = nlp.begin_training()
    for itn in range(n_iter):
        random.shuffle(TRAIN_DATA)
        losses = {}
        for text, annotations in tqdm(TRAIN_DATA):
            nlp.update(
                [text],  # batch of texts
                [annotations],  # batch of annotations
                drop=0.5,  # dropout 
                sgd=optimizer,  # callable to update weights
                losses=losses)
        print(losses)

BANK_ACCOUNT
SORT_CODE
EMAIL
PHONE
BANK_ACCOUNT
SORT_CODE
EMAIL
PHONE
BANK_ACCOUNT
PHONE
SORT_CODE
EMAIL
BANK_ACCOUNT
SORT_CODE
PHONE
EMAIL
SORT_CODE
BANK_ACCOUNT
EMAIL
PHONE
SORT_CODE
EMAIL
BANK_ACCOUNT


100%|██████████| 10/10 [00:01<00:00,  8.96it/s]
 10%|█         | 1/10 [00:00<00:01,  7.58it/s]

{'ner': 180.19079176992125}


100%|██████████| 10/10 [00:01<00:00,  8.73it/s]
 10%|█         | 1/10 [00:00<00:00,  9.35it/s]

{'ner': 161.62054649833746}


100%|██████████| 10/10 [00:01<00:00,  8.99it/s]
 10%|█         | 1/10 [00:00<00:00,  9.80it/s]

{'ner': 143.50688469600294}


100%|██████████| 10/10 [00:01<00:00,  8.98it/s]
 10%|█         | 1/10 [00:00<00:00,  9.90it/s]

{'ner': 134.85134443425562}


100%|██████████| 10/10 [00:01<00:00,  8.93it/s]
 10%|█         | 1/10 [00:00<00:01,  8.85it/s]

{'ner': 155.95017497154186}


100%|██████████| 10/10 [00:01<00:00,  8.93it/s]
 10%|█         | 1/10 [00:00<00:00,  9.62it/s]

{'ner': 139.53863512125827}


100%|██████████| 10/10 [00:01<00:00,  8.66it/s]
  0%|          | 0/10 [00:00<?, ?it/s]

{'ner': 142.61962518654764}


100%|██████████| 10/10 [00:01<00:00,  8.70it/s]
 10%|█         | 1/10 [00:00<00:01,  6.83it/s]

{'ner': 140.81224481885147}


100%|██████████| 10/10 [00:01<00:00,  8.47it/s]
 10%|█         | 1/10 [00:00<00:00,  9.26it/s]

{'ner': 128.19420933019137}


100%|██████████| 10/10 [00:01<00:00,  8.28it/s]
 10%|█         | 1/10 [00:00<00:00,  9.52it/s]

{'ner': 132.04527877485816}


100%|██████████| 10/10 [00:01<00:00,  8.33it/s]
 10%|█         | 1/10 [00:00<00:01,  8.00it/s]

{'ner': 129.11857277248055}


100%|██████████| 10/10 [00:01<00:00,  8.21it/s]
  0%|          | 0/10 [00:00<?, ?it/s]

{'ner': 125.7192676588893}


100%|██████████| 10/10 [00:01<00:00,  8.84it/s]
 10%|█         | 1/10 [00:00<00:01,  7.85it/s]

{'ner': 129.90232367487624}


100%|██████████| 10/10 [00:01<00:00,  8.66it/s]
  0%|          | 0/10 [00:00<?, ?it/s]

{'ner': 127.87291296664625}


100%|██████████| 10/10 [00:01<00:00,  8.81it/s]
 10%|█         | 1/10 [00:00<00:01,  8.06it/s]

{'ner': 134.24521146295592}


100%|██████████| 10/10 [00:01<00:00,  8.79it/s]
 10%|█         | 1/10 [00:00<00:01,  8.70it/s]

{'ner': 126.43459716680809}


100%|██████████| 10/10 [00:01<00:00,  8.72it/s]
 10%|█         | 1/10 [00:00<00:01,  8.70it/s]

{'ner': 130.61528441496193}


100%|██████████| 10/10 [00:01<00:00,  8.82it/s]
 10%|█         | 1/10 [00:00<00:00,  9.17it/s]

{'ner': 124.31460718438029}


100%|██████████| 10/10 [00:01<00:00,  8.42it/s]
 10%|█         | 1/10 [00:00<00:01,  6.89it/s]

{'ner': 117.49026206810959}


100%|██████████| 10/10 [00:01<00:00,  8.78it/s]
 10%|█         | 1/10 [00:00<00:00,  9.26it/s]

{'ner': 126.96769812483399}


100%|██████████| 10/10 [00:01<00:00,  8.62it/s]
 10%|█         | 1/10 [00:00<00:00,  9.62it/s]

{'ner': 118.66182941140141}


100%|██████████| 10/10 [00:01<00:00,  9.01it/s]
 10%|█         | 1/10 [00:00<00:00,  9.01it/s]

{'ner': 131.31484260736033}


100%|██████████| 10/10 [00:01<00:00,  8.60it/s]
 10%|█         | 1/10 [00:00<00:01,  8.26it/s]

{'ner': 125.61120177990233}


100%|██████████| 10/10 [00:01<00:00,  8.83it/s]
 10%|█         | 1/10 [00:00<00:00,  9.52it/s]

{'ner': 124.94044966855654}


100%|██████████| 10/10 [00:01<00:00,  8.25it/s]
 10%|█         | 1/10 [00:00<00:01,  8.20it/s]

{'ner': 130.295160634676}


100%|██████████| 10/10 [00:01<00:00,  8.44it/s]
 10%|█         | 1/10 [00:00<00:01,  8.28it/s]

{'ner': 131.60723632294685}


100%|██████████| 10/10 [00:01<00:00,  8.96it/s]
 10%|█         | 1/10 [00:00<00:01,  8.40it/s]

{'ner': 116.6619615983218}


100%|██████████| 10/10 [00:01<00:00,  8.72it/s]
  0%|          | 0/10 [00:00<?, ?it/s]

{'ner': 126.42757760488894}


100%|██████████| 10/10 [00:01<00:00,  8.96it/s]
 10%|█         | 1/10 [00:00<00:01,  8.85it/s]

{'ner': 124.15652387362206}


100%|██████████| 10/10 [00:01<00:00,  8.82it/s]
 10%|█         | 1/10 [00:00<00:00,  9.80it/s]

{'ner': 139.17422819836065}


100%|██████████| 10/10 [00:01<00:00,  8.69it/s]
 10%|█         | 1/10 [00:00<00:00,  9.35it/s]

{'ner': 113.6427643254865}


100%|██████████| 10/10 [00:01<00:00,  8.65it/s]
 10%|█         | 1/10 [00:00<00:01,  8.86it/s]

{'ner': 119.21563973551383}


100%|██████████| 10/10 [00:01<00:00,  8.73it/s]
 10%|█         | 1/10 [00:00<00:01,  8.85it/s]

{'ner': 127.44081011610979}


100%|██████████| 10/10 [00:01<00:00,  8.33it/s]
 10%|█         | 1/10 [00:00<00:00,  9.35it/s]

{'ner': 115.92890261756838}


100%|██████████| 10/10 [00:01<00:00,  8.62it/s]
 10%|█         | 1/10 [00:00<00:00,  9.09it/s]

{'ner': 135.1948231161805}


100%|██████████| 10/10 [00:01<00:00,  8.43it/s]
 10%|█         | 1/10 [00:00<00:01,  8.33it/s]

{'ner': 122.91226010513492}


100%|██████████| 10/10 [00:01<00:00,  8.47it/s]
  0%|          | 0/10 [00:00<?, ?it/s]

{'ner': 139.7247038728092}


100%|██████████| 10/10 [00:01<00:00,  8.92it/s]
 10%|█         | 1/10 [00:00<00:00,  9.43it/s]

{'ner': 128.6136598335579}


100%|██████████| 10/10 [00:01<00:00,  7.85it/s]
 10%|█         | 1/10 [00:00<00:01,  7.81it/s]

{'ner': 118.66768923262134}


100%|██████████| 10/10 [00:01<00:00,  8.53it/s]
 10%|█         | 1/10 [00:00<00:00,  9.52it/s]

{'ner': 121.02352613286348}


100%|██████████| 10/10 [00:01<00:00,  8.47it/s]
 10%|█         | 1/10 [00:00<00:00,  9.71it/s]

{'ner': 119.23120404907968}


100%|██████████| 10/10 [00:01<00:00,  8.45it/s]
 10%|█         | 1/10 [00:00<00:00,  9.26it/s]

{'ner': 127.64931121177506}


100%|██████████| 10/10 [00:01<00:00,  8.94it/s]
 10%|█         | 1/10 [00:00<00:01,  8.32it/s]

{'ner': 128.73170921346173}


100%|██████████| 10/10 [00:01<00:00,  8.95it/s]
 10%|█         | 1/10 [00:00<00:00,  9.16it/s]

{'ner': 130.00508262292624}


100%|██████████| 10/10 [00:01<00:00,  8.44it/s]
 10%|█         | 1/10 [00:00<00:00,  9.62it/s]

{'ner': 121.7110794770997}


100%|██████████| 10/10 [00:01<00:00,  8.74it/s]
 10%|█         | 1/10 [00:00<00:00,  9.43it/s]

{'ner': 128.3412255455478}


100%|██████████| 10/10 [00:01<00:00,  8.96it/s]
 10%|█         | 1/10 [00:00<00:00,  9.43it/s]

{'ner': 127.36472928000148}


100%|██████████| 10/10 [00:01<00:00,  8.81it/s]
 10%|█         | 1/10 [00:00<00:01,  8.13it/s]

{'ner': 132.287021256765}


100%|██████████| 10/10 [00:01<00:00,  8.50it/s]
 10%|█         | 1/10 [00:00<00:00,  9.26it/s]

{'ner': 124.4482329300954}


100%|██████████| 10/10 [00:01<00:00,  8.85it/s]
 10%|█         | 1/10 [00:00<00:00,  9.90it/s]

{'ner': 117.91419947966642}


100%|██████████| 10/10 [00:01<00:00,  8.85it/s]
 10%|█         | 1/10 [00:00<00:00,  9.94it/s]

{'ner': 122.53317221510224}


100%|██████████| 10/10 [00:01<00:00,  8.71it/s]
 10%|█         | 1/10 [00:00<00:01,  7.94it/s]

{'ner': 119.47855233237351}


100%|██████████| 10/10 [00:01<00:00,  7.96it/s]
 10%|█         | 1/10 [00:00<00:00,  9.62it/s]

{'ner': 126.17944460478611}


100%|██████████| 10/10 [00:01<00:00,  8.63it/s]
 10%|█         | 1/10 [00:00<00:01,  8.70it/s]

{'ner': 131.32996161744813}


100%|██████████| 10/10 [00:01<00:00,  8.63it/s]
  0%|          | 0/10 [00:00<?, ?it/s]

{'ner': 121.10184422368184}


100%|██████████| 10/10 [00:01<00:00,  9.03it/s]
  0%|          | 0/10 [00:00<?, ?it/s]

{'ner': 132.6908452779753}


100%|██████████| 10/10 [00:01<00:00,  9.03it/s]
  0%|          | 0/10 [00:00<?, ?it/s]

{'ner': 119.99742430972401}


100%|██████████| 10/10 [00:01<00:00,  9.04it/s]
 10%|█         | 1/10 [00:00<00:01,  8.36it/s]

{'ner': 114.27709241697448}


100%|██████████| 10/10 [00:01<00:00,  9.08it/s]
 20%|██        | 2/10 [00:00<00:00, 10.26it/s]

{'ner': 126.66843414574396}


100%|██████████| 10/10 [00:01<00:00,  9.17it/s]
 10%|█         | 1/10 [00:00<00:00,  9.09it/s]

{'ner': 123.24906320909213}


100%|██████████| 10/10 [00:01<00:00,  9.30it/s]
 10%|█         | 1/10 [00:00<00:00,  9.35it/s]

{'ner': 128.68681004655082}


100%|██████████| 10/10 [00:01<00:00,  9.05it/s]
 10%|█         | 1/10 [00:00<00:01,  8.40it/s]

{'ner': 125.94724466349362}


100%|██████████| 10/10 [00:01<00:00,  8.60it/s]
 10%|█         | 1/10 [00:00<00:01,  7.87it/s]

{'ner': 117.13751935222535}


100%|██████████| 10/10 [00:01<00:00,  9.02it/s]
 10%|█         | 1/10 [00:00<00:00,  9.80it/s]

{'ner': 118.85416782340326}


100%|██████████| 10/10 [00:01<00:00,  8.92it/s]
 10%|█         | 1/10 [00:00<00:00,  9.26it/s]

{'ner': 118.036973446724}


100%|██████████| 10/10 [00:01<00:00,  9.09it/s]
  0%|          | 0/10 [00:00<?, ?it/s]

{'ner': 121.38005695259199}


100%|██████████| 10/10 [00:01<00:00,  8.17it/s]
 10%|█         | 1/10 [00:00<00:00,  9.26it/s]

{'ner': 116.08989332359124}


100%|██████████| 10/10 [00:01<00:00,  8.65it/s]
 10%|█         | 1/10 [00:00<00:00,  9.17it/s]

{'ner': 125.66812393462169}


100%|██████████| 10/10 [00:01<00:00,  8.79it/s]
 10%|█         | 1/10 [00:00<00:00,  9.71it/s]

{'ner': 133.4693199543981}


100%|██████████| 10/10 [00:01<00:00,  8.80it/s]
 10%|█         | 1/10 [00:00<00:01,  8.00it/s]

{'ner': 128.89055010594893}


100%|██████████| 10/10 [00:01<00:00,  9.07it/s]
 10%|█         | 1/10 [00:00<00:01,  8.62it/s]

{'ner': 116.5183457119274}


100%|██████████| 10/10 [00:01<00:00,  8.61it/s]
 10%|█         | 1/10 [00:00<00:01,  8.76it/s]

{'ner': 122.96127140108729}


100%|██████████| 10/10 [00:01<00:00,  9.14it/s]
 10%|█         | 1/10 [00:00<00:00,  9.09it/s]

{'ner': 117.39162179173809}


100%|██████████| 10/10 [00:01<00:00,  8.72it/s]
 10%|█         | 1/10 [00:00<00:00,  9.26it/s]

{'ner': 120.09751915344623}


100%|██████████| 10/10 [00:01<00:00,  8.92it/s]
 10%|█         | 1/10 [00:00<00:01,  8.55it/s]

{'ner': 115.24871879525017}


100%|██████████| 10/10 [00:01<00:00,  9.19it/s]
 10%|█         | 1/10 [00:00<00:00,  9.35it/s]

{'ner': 125.24243381281849}


100%|██████████| 10/10 [00:01<00:00,  9.41it/s]
 10%|█         | 1/10 [00:00<00:00,  9.62it/s]

{'ner': 125.39726616365078}


100%|██████████| 10/10 [00:01<00:00,  8.65it/s]
 10%|█         | 1/10 [00:00<00:01,  7.41it/s]

{'ner': 129.0770548612345}


100%|██████████| 10/10 [00:01<00:00,  9.03it/s]
  0%|          | 0/10 [00:00<?, ?it/s]

{'ner': 128.6144917752972}


100%|██████████| 10/10 [00:01<00:00,  9.04it/s]
 10%|█         | 1/10 [00:00<00:00,  9.71it/s]

{'ner': 119.63321937848741}


100%|██████████| 10/10 [00:01<00:00,  8.29it/s]
 10%|█         | 1/10 [00:00<00:01,  8.40it/s]

{'ner': 117.16334246949907}


100%|██████████| 10/10 [00:01<00:00,  8.72it/s]
 10%|█         | 1/10 [00:00<00:01,  8.85it/s]

{'ner': 112.92914314232712}


100%|██████████| 10/10 [00:01<00:00,  9.02it/s]
 20%|██        | 2/10 [00:00<00:00, 10.42it/s]

{'ner': 127.8310776268554}


100%|██████████| 10/10 [00:01<00:00,  8.92it/s]
 20%|██        | 2/10 [00:00<00:00, 10.53it/s]

{'ner': 122.20647187624218}


100%|██████████| 10/10 [00:01<00:00,  8.81it/s]
 10%|█         | 1/10 [00:00<00:00,  9.71it/s]

{'ner': 118.29851282623929}


100%|██████████| 10/10 [00:01<00:00,  9.04it/s]
 10%|█         | 1/10 [00:00<00:01,  8.06it/s]

{'ner': 117.19648386000699}


100%|██████████| 10/10 [00:01<00:00,  9.06it/s]
 10%|█         | 1/10 [00:00<00:01,  8.26it/s]

{'ner': 134.85713061941715}


100%|██████████| 10/10 [00:01<00:00,  8.73it/s]
 10%|█         | 1/10 [00:00<00:01,  8.77it/s]

{'ner': 125.62234556811745}


100%|██████████| 10/10 [00:01<00:00,  8.97it/s]
 10%|█         | 1/10 [00:00<00:00,  9.25it/s]

{'ner': 125.26464417611714}


100%|██████████| 10/10 [00:01<00:00,  8.73it/s]
 10%|█         | 1/10 [00:00<00:00,  9.80it/s]

{'ner': 122.99320230966259}


100%|██████████| 10/10 [00:01<00:00,  8.89it/s]
  0%|          | 0/10 [00:00<?, ?it/s]

{'ner': 120.7366266852332}


100%|██████████| 10/10 [00:01<00:00,  8.79it/s]
 10%|█         | 1/10 [00:00<00:01,  7.94it/s]

{'ner': 118.50004568617442}


100%|██████████| 10/10 [00:01<00:00,  8.93it/s]
 10%|█         | 1/10 [00:00<00:00,  9.35it/s]

{'ner': 114.26631218910916}


100%|██████████| 10/10 [00:01<00:00,  8.89it/s]
 10%|█         | 1/10 [00:00<00:01,  8.93it/s]

{'ner': 113.82783105352428}


100%|██████████| 10/10 [00:01<00:00,  8.56it/s]
 10%|█         | 1/10 [00:00<00:01,  8.62it/s]

{'ner': 117.83319172652}


100%|██████████| 10/10 [00:01<00:00,  8.52it/s]
 10%|█         | 1/10 [00:00<00:01,  7.46it/s]

{'ner': 111.4186153318442}


100%|██████████| 10/10 [00:01<00:00,  8.73it/s]
 10%|█         | 1/10 [00:00<00:01,  8.13it/s]

{'ner': 107.10372764134445}


100%|██████████| 10/10 [00:01<00:00,  8.55it/s]
 10%|█         | 1/10 [00:00<00:01,  8.55it/s]

{'ner': 129.24199279100867}


100%|██████████| 10/10 [00:01<00:00,  8.39it/s]
 10%|█         | 1/10 [00:00<00:01,  8.33it/s]

{'ner': 115.57123450486688}


100%|██████████| 10/10 [00:01<00:00,  8.55it/s]
 10%|█         | 1/10 [00:00<00:00,  9.80it/s]

{'ner': 124.6333446779754}


100%|██████████| 10/10 [00:01<00:00,  8.87it/s]
 10%|█         | 1/10 [00:00<00:00, 10.00it/s]

{'ner': 112.79702707739762}


100%|██████████| 10/10 [00:01<00:00,  8.84it/s]
 10%|█         | 1/10 [00:00<00:01,  7.48it/s]

{'ner': 114.69318732767192}


100%|██████████| 10/10 [00:01<00:00,  8.59it/s]
 10%|█         | 1/10 [00:00<00:01,  8.70it/s]

{'ner': 112.55233155815404}


100%|██████████| 10/10 [00:01<00:00,  8.50it/s]
 10%|█         | 1/10 [00:00<00:01,  8.55it/s]

{'ner': 109.63693534756021}


100%|██████████| 10/10 [00:01<00:00,  8.77it/s]
 10%|█         | 1/10 [00:00<00:00,  9.62it/s]

{'ner': 108.96443921778882}


100%|██████████| 10/10 [00:01<00:00,  8.60it/s]
 10%|█         | 1/10 [00:00<00:01,  6.33it/s]

{'ner': 116.21020082042378}


100%|██████████| 10/10 [00:01<00:00,  8.15it/s]
 10%|█         | 1/10 [00:00<00:01,  8.40it/s]

{'ner': 126.6494964963058}


100%|██████████| 10/10 [00:01<00:00,  8.82it/s]
  0%|          | 0/10 [00:00<?, ?it/s]

{'ner': 115.21128343879809}


100%|██████████| 10/10 [00:01<00:00,  8.01it/s]
 10%|█         | 1/10 [00:00<00:01,  7.46it/s]

{'ner': 111.88465258467477}


100%|██████████| 10/10 [00:01<00:00,  8.51it/s]
 10%|█         | 1/10 [00:00<00:00,  9.18it/s]

{'ner': 119.86301336972247}


100%|██████████| 10/10 [00:01<00:00,  8.70it/s]
 10%|█         | 1/10 [00:00<00:01,  8.70it/s]

{'ner': 121.32779276318615}


100%|██████████| 10/10 [00:01<00:00,  8.53it/s]
  0%|          | 0/10 [00:00<?, ?it/s]

{'ner': 116.87023906385002}


100%|██████████| 10/10 [00:01<00:00,  8.54it/s]
 10%|█         | 1/10 [00:00<00:01,  8.00it/s]

{'ner': 118.03861466338742}


100%|██████████| 10/10 [00:01<00:00,  8.13it/s]
 10%|█         | 1/10 [00:00<00:00,  9.09it/s]

{'ner': 113.64501056276436}


100%|██████████| 10/10 [00:01<00:00,  8.42it/s]
 10%|█         | 1/10 [00:00<00:01,  8.47it/s]

{'ner': 107.01935735252187}


100%|██████████| 10/10 [00:01<00:00,  8.14it/s]
 10%|█         | 1/10 [00:00<00:01,  8.70it/s]

{'ner': 122.35817433125521}


100%|██████████| 10/10 [00:01<00:00,  7.99it/s]
 10%|█         | 1/10 [00:00<00:00,  9.35it/s]

{'ner': 120.18711045027158}


100%|██████████| 10/10 [00:01<00:00,  8.13it/s]
 10%|█         | 1/10 [00:00<00:01,  7.46it/s]

{'ner': 112.0676613265025}


100%|██████████| 10/10 [00:01<00:00,  7.93it/s]
 10%|█         | 1/10 [00:00<00:01,  8.55it/s]

{'ner': 122.89481551971403}


100%|██████████| 10/10 [00:01<00:00,  7.86it/s]
 10%|█         | 1/10 [00:00<00:01,  7.63it/s]

{'ner': 122.11534005583053}


100%|██████████| 10/10 [00:01<00:00,  8.02it/s]
 10%|█         | 1/10 [00:00<00:01,  7.81it/s]

{'ner': 110.97311367862858}


100%|██████████| 10/10 [00:01<00:00,  7.12it/s]
 10%|█         | 1/10 [00:00<00:01,  7.25it/s]

{'ner': 111.06627882132307}


100%|██████████| 10/10 [00:01<00:00,  7.49it/s]
 10%|█         | 1/10 [00:00<00:01,  7.09it/s]

{'ner': 111.7580337523832}


100%|██████████| 10/10 [00:01<00:00,  7.22it/s]
 10%|█         | 1/10 [00:00<00:01,  7.51it/s]

{'ner': 113.91889419275685}


100%|██████████| 10/10 [00:01<00:00,  7.08it/s]
 10%|█         | 1/10 [00:00<00:01,  5.88it/s]

{'ner': 116.98522335774146}


100%|██████████| 10/10 [00:01<00:00,  7.17it/s]
 10%|█         | 1/10 [00:00<00:01,  6.85it/s]

{'ner': 110.87644807695779}


100%|██████████| 10/10 [00:01<00:00,  7.09it/s]
 10%|█         | 1/10 [00:00<00:01,  7.87it/s]

{'ner': 107.51825498542894}


100%|██████████| 10/10 [00:01<00:00,  7.22it/s]
 10%|█         | 1/10 [00:00<00:01,  7.46it/s]

{'ner': 119.67822703096817}


100%|██████████| 10/10 [00:01<00:00,  6.95it/s]
 10%|█         | 1/10 [00:00<00:01,  6.25it/s]

{'ner': 123.18860820445752}


100%|██████████| 10/10 [00:01<00:00,  7.11it/s]
 10%|█         | 1/10 [00:00<00:01,  6.94it/s]

{'ner': 119.55057143645536}


100%|██████████| 10/10 [00:01<00:00,  6.78it/s]
 10%|█         | 1/10 [00:00<00:01,  7.54it/s]

{'ner': 120.18264544114936}


100%|██████████| 10/10 [00:01<00:00,  6.85it/s]
 10%|█         | 1/10 [00:00<00:01,  7.30it/s]

{'ner': 115.18273147777654}


100%|██████████| 10/10 [00:01<00:00,  6.35it/s]
  0%|          | 0/10 [00:00<?, ?it/s]

{'ner': 115.63722243190205}


100%|██████████| 10/10 [00:01<00:00,  6.59it/s]
 10%|█         | 1/10 [00:00<00:01,  6.62it/s]

{'ner': 124.36944334451982}


100%|██████████| 10/10 [00:01<00:00,  6.85it/s]
 10%|█         | 1/10 [00:00<00:01,  8.34it/s]

{'ner': 126.88150042132474}


100%|██████████| 10/10 [00:01<00:00,  6.62it/s]
 10%|█         | 1/10 [00:00<00:01,  6.37it/s]

{'ner': 118.96663575083949}


100%|██████████| 10/10 [00:01<00:00,  6.55it/s]
 10%|█         | 1/10 [00:00<00:01,  7.28it/s]

{'ner': 119.5635392122058}


100%|██████████| 10/10 [00:01<00:00,  6.66it/s]
 10%|█         | 1/10 [00:00<00:01,  7.21it/s]

{'ner': 120.62555622775835}


100%|██████████| 10/10 [00:01<00:00,  6.61it/s]
 10%|█         | 1/10 [00:00<00:01,  6.58it/s]

{'ner': 124.43068919115467}


100%|██████████| 10/10 [00:01<00:00,  6.60it/s]
 10%|█         | 1/10 [00:00<00:01,  6.21it/s]

{'ner': 117.26769625313958}


100%|██████████| 10/10 [00:01<00:00,  6.36it/s]
 10%|█         | 1/10 [00:00<00:01,  6.94it/s]

{'ner': 115.42536027026654}


100%|██████████| 10/10 [00:01<00:00,  6.54it/s]
 10%|█         | 1/10 [00:00<00:01,  6.71it/s]

{'ner': 124.58355655874175}


100%|██████████| 10/10 [00:01<00:00,  6.39it/s]
 10%|█         | 1/10 [00:00<00:01,  6.25it/s]

{'ner': 125.86522817256537}


100%|██████████| 10/10 [00:01<00:00,  6.00it/s]
 10%|█         | 1/10 [00:00<00:01,  5.98it/s]

{'ner': 120.20871251695203}


100%|██████████| 10/10 [00:01<00:00,  6.43it/s]
 10%|█         | 1/10 [00:00<00:01,  5.41it/s]

{'ner': 107.93801036335935}


100%|██████████| 10/10 [00:01<00:00,  6.42it/s]
 10%|█         | 1/10 [00:00<00:01,  6.18it/s]

{'ner': 110.22570857660321}


100%|██████████| 10/10 [00:01<00:00,  6.28it/s]
 10%|█         | 1/10 [00:00<00:01,  5.95it/s]

{'ner': 119.48355421268207}


100%|██████████| 10/10 [00:01<00:00,  6.23it/s]
 10%|█         | 1/10 [00:00<00:01,  6.80it/s]

{'ner': 117.37125643896525}


100%|██████████| 10/10 [00:01<00:00,  6.10it/s]
 10%|█         | 1/10 [00:00<00:01,  6.21it/s]

{'ner': 115.30861698428635}


100%|██████████| 10/10 [00:01<00:00,  6.29it/s]
  0%|          | 0/10 [00:00<?, ?it/s]

{'ner': 116.08800021958996}


100%|██████████| 10/10 [00:01<00:00,  5.91it/s]
 10%|█         | 1/10 [00:00<00:01,  6.45it/s]

{'ner': 110.87621792778373}


100%|██████████| 10/10 [00:01<00:00,  6.05it/s]
 10%|█         | 1/10 [00:00<00:01,  6.49it/s]

{'ner': 113.60915938258222}


100%|██████████| 10/10 [00:01<00:00,  5.94it/s]
 10%|█         | 1/10 [00:00<00:01,  5.71it/s]

{'ner': 119.72772405555017}


100%|██████████| 10/10 [00:01<00:00,  5.70it/s]
 10%|█         | 1/10 [00:00<00:01,  5.99it/s]

{'ner': 118.44960798300508}


100%|██████████| 10/10 [00:01<00:00,  5.85it/s]
 10%|█         | 1/10 [00:00<00:01,  6.17it/s]

{'ner': 120.34862829440999}


100%|██████████| 10/10 [00:01<00:00,  5.69it/s]
 10%|█         | 1/10 [00:00<00:01,  6.62it/s]

{'ner': 106.21659434377943}


100%|██████████| 10/10 [00:01<00:00,  5.91it/s]
  0%|          | 0/10 [00:00<?, ?it/s]

{'ner': 109.94940022137416}


100%|██████████| 10/10 [00:01<00:00,  5.81it/s]
 10%|█         | 1/10 [00:00<00:01,  5.56it/s]

{'ner': 109.4877945204571}


100%|██████████| 10/10 [00:01<00:00,  6.15it/s]
 10%|█         | 1/10 [00:00<00:01,  5.56it/s]

{'ner': 121.81602142550264}


100%|██████████| 10/10 [00:01<00:00,  6.00it/s]
  0%|          | 0/10 [00:00<?, ?it/s]

{'ner': 122.47151275647047}


100%|██████████| 10/10 [00:01<00:00,  5.92it/s]
 10%|█         | 1/10 [00:00<00:01,  6.14it/s]

{'ner': 112.43876494088909}


100%|██████████| 10/10 [00:01<00:00,  5.76it/s]
 10%|█         | 1/10 [00:00<00:01,  6.13it/s]

{'ner': 120.01948077022098}


100%|██████████| 10/10 [00:01<00:00,  5.43it/s]
 10%|█         | 1/10 [00:00<00:01,  5.75it/s]

{'ner': 106.76648850554193}


100%|██████████| 10/10 [00:01<00:00,  5.80it/s]
 10%|█         | 1/10 [00:00<00:01,  5.56it/s]

{'ner': 128.3812804595491}


100%|██████████| 10/10 [00:01<00:00,  5.76it/s]
 10%|█         | 1/10 [00:00<00:01,  5.71it/s]

{'ner': 109.46003093755098}


100%|██████████| 10/10 [00:01<00:00,  5.49it/s]
 10%|█         | 1/10 [00:00<00:01,  5.74it/s]

{'ner': 120.46937920562414}


100%|██████████| 10/10 [00:01<00:00,  5.78it/s]
 10%|█         | 1/10 [00:00<00:01,  6.22it/s]

{'ner': 122.42136321541602}


100%|██████████| 10/10 [00:01<00:00,  5.69it/s]
 10%|█         | 1/10 [00:00<00:01,  5.99it/s]

{'ner': 116.10073392265099}


100%|██████████| 10/10 [00:01<00:00,  5.91it/s]
 10%|█         | 1/10 [00:00<00:01,  5.71it/s]

{'ner': 112.98717784931887}


100%|██████████| 10/10 [00:01<00:00,  5.86it/s]
 10%|█         | 1/10 [00:00<00:01,  6.38it/s]

{'ner': 114.92160447141737}


100%|██████████| 10/10 [00:01<00:00,  5.75it/s]
 10%|█         | 1/10 [00:00<00:01,  6.15it/s]

{'ner': 128.06186720610276}


100%|██████████| 10/10 [00:01<00:00,  5.39it/s]
  0%|          | 0/10 [00:00<?, ?it/s]

{'ner': 119.14936598037457}


100%|██████████| 10/10 [00:01<00:00,  5.72it/s]
 10%|█         | 1/10 [00:00<00:01,  6.06it/s]

{'ner': 130.1414676598506}


100%|██████████| 10/10 [00:01<00:00,  5.82it/s]
 10%|█         | 1/10 [00:00<00:01,  6.29it/s]

{'ner': 115.78997789890491}


100%|██████████| 10/10 [00:01<00:00,  5.29it/s]
 10%|█         | 1/10 [00:00<00:01,  5.46it/s]

{'ner': 118.821108094191}


100%|██████████| 10/10 [00:01<00:00,  5.70it/s]
  0%|          | 0/10 [00:00<?, ?it/s]

{'ner': 128.49963829864282}


100%|██████████| 10/10 [00:01<00:00,  5.60it/s]
 10%|█         | 1/10 [00:00<00:01,  5.95it/s]

{'ner': 121.9832441713661}


100%|██████████| 10/10 [00:01<00:00,  5.58it/s]
 10%|█         | 1/10 [00:00<00:01,  5.52it/s]

{'ner': 115.0687768268881}


100%|██████████| 10/10 [00:01<00:00,  5.58it/s]
 10%|█         | 1/10 [00:00<00:01,  5.85it/s]

{'ner': 112.77089593733399}


100%|██████████| 10/10 [00:01<00:00,  5.67it/s]
 10%|█         | 1/10 [00:00<00:01,  5.50it/s]

{'ner': 114.68788417827454}


100%|██████████| 10/10 [00:01<00:00,  5.31it/s]
 10%|█         | 1/10 [00:00<00:01,  6.02it/s]

{'ner': 115.6650888066215}


100%|██████████| 10/10 [00:01<00:00,  5.67it/s]
 10%|█         | 1/10 [00:00<00:01,  6.54it/s]

{'ner': 118.72295303581632}


100%|██████████| 10/10 [00:01<00:00,  5.73it/s]
 10%|█         | 1/10 [00:00<00:01,  5.56it/s]

{'ner': 122.35135126395653}


100%|██████████| 10/10 [00:01<00:00,  5.63it/s]
 10%|█         | 1/10 [00:00<00:01,  5.71it/s]

{'ner': 130.08897113171406}


100%|██████████| 10/10 [00:01<00:00,  5.59it/s]
 10%|█         | 1/10 [00:00<00:01,  5.80it/s]

{'ner': 112.08346906508541}


100%|██████████| 10/10 [00:01<00:00,  5.51it/s]
 10%|█         | 1/10 [00:00<00:01,  6.35it/s]

{'ner': 115.22583137524998}


100%|██████████| 10/10 [00:01<00:00,  5.57it/s]
 10%|█         | 1/10 [00:00<00:01,  6.02it/s]

{'ner': 111.51338311956249}


100%|██████████| 10/10 [00:01<00:00,  5.57it/s]
 10%|█         | 1/10 [00:00<00:01,  5.49it/s]

{'ner': 116.51902263294369}


100%|██████████| 10/10 [00:01<00:00,  5.47it/s]
  0%|          | 0/10 [00:00<?, ?it/s]

{'ner': 128.62546360883243}


100%|██████████| 10/10 [00:01<00:00,  5.18it/s]
 10%|█         | 1/10 [00:00<00:01,  5.81it/s]

{'ner': 114.98838632280217}


100%|██████████| 10/10 [00:01<00:00,  5.53it/s]
 10%|█         | 1/10 [00:00<00:01,  5.85it/s]

{'ner': 108.390553724249}


100%|██████████| 10/10 [00:01<00:00,  5.51it/s]
 10%|█         | 1/10 [00:00<00:01,  5.58it/s]

{'ner': 101.28408785220381}


100%|██████████| 10/10 [00:01<00:00,  5.35it/s]
 10%|█         | 1/10 [00:00<00:01,  5.59it/s]

{'ner': 122.19780560738445}


100%|██████████| 10/10 [00:01<00:00,  5.49it/s]
 10%|█         | 1/10 [00:00<00:01,  5.43it/s]

{'ner': 109.47780981677715}


100%|██████████| 10/10 [00:01<00:00,  5.42it/s]
 10%|█         | 1/10 [00:00<00:01,  5.59it/s]

{'ner': 119.34277820373882}


100%|██████████| 10/10 [00:01<00:00,  5.42it/s]
  0%|          | 0/10 [00:00<?, ?it/s]

{'ner': 123.1486178680052}


100%|██████████| 10/10 [00:01<00:00,  5.36it/s]
 10%|█         | 1/10 [00:00<00:01,  5.70it/s]

{'ner': 111.19801996909655}


100%|██████████| 10/10 [00:01<00:00,  5.29it/s]
 10%|█         | 1/10 [00:00<00:01,  5.41it/s]

{'ner': 122.00596349374246}


100%|██████████| 10/10 [00:01<00:00,  5.39it/s]
 10%|█         | 1/10 [00:00<00:01,  5.56it/s]

{'ner': 110.00033261038334}


100%|██████████| 10/10 [00:01<00:00,  5.36it/s]

{'ner': 121.39785339080845}





In [8]:
# test the trained model
for text, _ in TRAIN_DATA:
    doc = nlp(text)
    print('Entities', [(ent.text, ent.label_) for ent in doc.ents])

Entities [('09876543', 'BANK_ACCOUNT'), ('13-20-54', 'SORT_CODE'), ('1290 344 456', 'PHONE'), ('helpme@outlook.com', 'EMAIL')]
Entities [('mickeymouse@yahoo.com', 'EMAIL'), ('01480 345 345', 'PHONE')]
Entities [('34-23-12 and contact me', 'SORT_CODE'), ('xyz@googlemail.dk', 'EMAIL')]
Entities [('45674567', 'BANK_ACCOUNT'), ('01345 567567', 'PHONE')]
Entities [('09876543', 'BANK_ACCOUNT'), ('11-01-45', 'SORT_CODE')]
Entities [('33-34-35', 'SORT_CODE'), ('33445566', 'BANK_ACCOUNT')]
Entities [('22-23-71', 'SORT_CODE'), ('Daisy@googlemail.co.uk', 'EMAIL'), ('44435987', 'BANK_ACCOUNT')]
Entities [('09780909', 'BANK_ACCOUNT'), ('12-34-56', 'SORT_CODE')]
Entities [('Guffy@yahoo.com', 'EMAIL'), ('01346 022 773', 'PHONE')]
Entities [('abc@hotmail.co.uk', 'EMAIL'), ('07726 000 123', 'PHONE')]


In [7]:
new_chat_test_doc = nlp(chat_text)
displacy.serve(new_chat_test_doc, style='ent')

  "__main__", mod_spec)



Using the 'ent' visualizer
Serving on http://0.0.0.0:5000 ...

Shutting down server on port 5000.
