# Notebook Overview

This notebook demonstrates an example usage of the trained model. The usage consists of predicting named entities for a labeled scenario in the labeled scenario dataset, and comparing the predicted labels with the expected labels.

In [2]:
from spacy.training import offsets_to_biluo_tags, biluo_to_iob
import spacy

# setup the spaCy English tokenizer
nlp_parser = spacy.load("en_core_web_sm")

def eval_scenario(scenario, nlp):
    '''
    Print the words, true label, and predicted label

    Input: 
    @ param scenario: A scenario from loaded dataset
    @param nlp: A pipeline generated from pipeline()
    
    '''
    # setup the expected labels for each word
    true_label = scenario['codes']
    
    # predict the named entities from the test scenario
    entities = nlp(scenario['text'])
    
    entity_triples = []
    for entity in entities:
        entity_triples.append([entity['start'], entity['end'], entity['entity_group']])

    # convert character-level label spans to BILUO tags
    doc = nlp_parser(scenario['text'])
    biluo_tags = offsets_to_biluo_tags(doc, entity_triples)
        
    # conver BILUO tags to BIO tags
    pred_label = biluo_to_iob(biluo_tags)

    print(f" {'WORD':<16} {'TRUE LABEL':<16} {'PREDICTION'}")
    print(f"{'-'*48}")
    for i in range(len(scenario['words'])):
        print(f" {scenario['words'][i]:<16} {true_label[i]:<16} {pred_label[i]}")

In [6]:
from transformers import AutoTokenizer, AutoModelForTokenClassification
from transformers import pipeline

# load the scenario data
dataset = json.load(open('../datasets/scenarios-labeled.json', 'r'))
scenarios = list(dataset.values())

# instantiate the tokenizer and model, setup pipeline
model_path = './bert-finetuned-ner/checkpoint-300'
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForTokenClassification.from_pretrained(model_path, ignore_mismatched_sizes=True)
nlp = pipeline('ner', model=model, tokenizer=tokenizer, aggregation_strategy='first')

In [7]:
# here is an example of making a prediction:

eval_scenario(scenarios[0], nlp)

 WORD             TRUE LABEL       PREDICTION
------------------------------------------------
 From             O                O
 this             O                O
 screen           O                O
 ,                O                O
 I                O                O
 like             O                O
 to               O                O
 search           O                O
 for              O                O
 anything         O                O
 from             O                O
 recipes          B-SIM            B-SIM
 ,                O                O
 to               O                O
 home             B-SIM            B-SIM
 decor            I-SIM            I-SIM
 ,                O                O
 to               O                O
 people           B-SIM            B-SIM
 ,                O                O
 etc              O                O
 .                O                O
 ,                O                O
 just             O                O
 