## Importing Training Data

In [19]:
import json

with open('test.json', 'r') as f:
    data = json.load(f)


training_data = []
for example in data['examples']:
    temp_dict = {}
    temp_dict['text'] = example['content']
    temp_dict['entities'] = []
    for annotation in example['annotations']:
        start = annotation['start']
        end = annotation['end'] + 1
        label = annotation['tag_name'].upper()
        temp_dict['entities'].append((start, end, label))
    training_data.append(temp_dict)
print(training_data[0])

{'text': 'Schedule a calendar event in Teak oaks HOA about competitions happening tomorrow', 'entities': [(0, 8, 'ACTION'), (11, 25, 'DOMAIN'), (29, 42, 'HOA'), (49, 71, 'EVENT')]}


## Converting training data to SpaCy Docbin format

In [20]:
import spacy
from spacy.tokens import DocBin
from tqdm import tqdm
from spacy.util import filter_spans


nlp = spacy.blank('en')
doc_bin = DocBin()
for training_example in tqdm(training_data):
    text = training_example['text']
    labels = training_example['entities']
    doc = nlp.make_doc(text)
    ents = []
    for start, end, label in labels:
        span = doc.char_span(start, end, label=label, alignment_mode="contract")
        if span is None:
            print("Skipping entity")
        else:
            ents.append(span)
    filtered_ents = filter_spans(ents)
    doc.ents = filtered_ents
    doc_bin.add(doc)


doc_bin.to_disk("train.spacy")

100%|██████████| 7/7 [00:00<00:00, 3749.22it/s]


Execute the Following Commands:

python -m spacy init fill-config base_config.cfg config.cfg

python -m spacy train config.cfg --output ./ --paths.train ./train.spacy --paths.dev ./train.spacy

In [25]:
# first we load the model
nlp_ner = spacy.load("model-best")

# we create a document object and we test the fine-tuned model
doc = nlp_ner("Could you please reserve a team brainstorming session on coming Wednesday at 11 AM?")


spacy.displacy.render(doc, style="ent")