# Notebook Overview

This notebook demonstrates an example usage of the trained model. The usage consists of predicting named entities for a labeled scenario in the labeled scenario dataset, and comparing the predicted labels with the expected labels.

In [1]:
from spacy.training import offsets_to_biluo_tags, biluo_to_iob
import spacy, json

# setup the spaCy English tokenizer
nlp_parser = spacy.load("en_core_web_sm")

def eval_scenario(scenario, nlp, output=False):
    # setup the expected labels for each word
    y_true = [t[2] for sent in scenario['tokens'] for t in sent]
    
    # predict the named entities from the test scenario
    entities = nlp(scenario['text'])
    
    entity_triples = []
    for entity in entities:
        entity_triples.append([entity['start'], entity['end'], entity['entity_group']])

    # convert character-level label spans to BILUO tags
    doc = nlp_parser(scenario['text'])
    biluo_tags = offsets_to_biluo_tags(doc, entity_triples)
        
    # conver BILUO tags to BIO tags
    y_pred = biluo_to_iob(biluo_tags)
        
    return y_true, y_pred

In [17]:
from transformers import AutoTokenizer, AutoModelForTokenClassification
from transformers import pipeline

# load the scenario data
dataset = json.load(open('../models/scenarios-training-N.json', 'r'))
scenarios = dataset['test']

# instantiate the tokenizer and model, setup pipeline
model_path = './bert-finetuned-ner/checkpoint-200'
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForTokenClassification.from_pretrained(model_path, ignore_mismatched_sizes=True)
nlp = pipeline('ner', model=model, tokenizer=tokenizer, aggregation_strategy='first')

In [18]:
# here is an example of making a prediction:

y_true, y_pred = eval_scenario(scenarios[0], nlp)

print(f" {'WORD':<16} {'TRUE LABEL':<16} {'PREDICTION'}")
print(f"{'-'*48}")
i = 0
for sent in scenarios[0]['tokens']:
    for token in sent:
        print(f" {token[0]:<16} {y_true[i]:<16} {y_pred[i]}")
        i += 1

 WORD             TRUE LABEL       PREDICTION
------------------------------------------------
 I                O                O
 get              O                O
 to               O                O
 this             O                O
 screen           O                O
 by               O                O
 opening          O                O
 the              O                O
 app              O                O
 and              O                O
 clicking         O                O
 on               O                O
 the              O                O
 side             O                O
 icon             O                O
 then             O                O
 clicking         O                O
 transaction      B-SIM            B-SIM
 history          I-SIM            I-SIM
 .                O                O
 I                O                O
 use              O                O
 this             O                O
 screen           O                O
 to      